KVM: selftests: Use canonical $(ARCH) paths for KVM selftests directories

author Sean Christopherson <seanjc@google.com>

Thu, 28 Nov 2024 00:55:46 +0000 (16:55 -0800)

committer Sean Christopherson <seanjc@google.com>

Wed, 18 Dec 2024 22:15:04 +0000 (14:15 -0800)
author Sean Christopherson <seanjc@google.com>
Thu, 28 Nov 2024 00:55:46 +0000 (16:55 -0800)
committer Sean Christopherson <seanjc@google.com>
Wed, 18 Dec 2024 22:15:04 +0000 (14:15 -0800)
diff --git a/MAINTAINERS b/MAINTAINERS

index 17daa9ee9384509c1ef3f2a3825a4594eab88741..2dba81d2fa6f210e78f3ccfc564049733c20e393 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12605,8 +12605,8 @@ F:      arch/arm64/include/asm/kvm*
  F:     arch/arm64/include/uapi/asm/kvm*
  F:     arch/arm64/kvm/
  F:     include/kvm/arm_*
-F:     tools/testing/selftests/kvm/*/aarch64/
-F:     tools/testing/selftests/kvm/aarch64/
+F:     tools/testing/selftests/kvm/*/arm64/
+F:     tools/testing/selftests/kvm/arm64/
  
  KERNEL VIRTUAL MACHINE FOR LOONGARCH (KVM/LoongArch)
  M:     Tianrui Zhao <zhaotianrui@loongson.cn>
@@ -12677,8 +12677,8 @@ F:      arch/s390/kvm/
  F:     arch/s390/mm/gmap.c
  F:     drivers/s390/char/uvdevice.c
  F:     tools/testing/selftests/drivers/s390x/uvdevice/
-F:     tools/testing/selftests/kvm/*/s390x/
-F:     tools/testing/selftests/kvm/s390x/
+F:     tools/testing/selftests/kvm/*/s390/
+F:     tools/testing/selftests/kvm/s390/
  
  KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
  M:     Sean Christopherson <seanjc@google.com>
@@ -12695,8 +12695,8 @@ F:      arch/x86/include/uapi/asm/svm.h
  F:     arch/x86/include/uapi/asm/vmx.h
  F:     arch/x86/kvm/
  F:     arch/x86/kvm/*/
-F:     tools/testing/selftests/kvm/*/x86_64/
-F:     tools/testing/selftests/kvm/x86_64/
+F:     tools/testing/selftests/kvm/*/x86/
+F:     tools/testing/selftests/kvm/x86/
  
  KERNFS
  M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile

index 7b33464bf8cc5c88287be667b3f766906a643283..9bc2eba1af1c4ce744843b6bba4e18f0c77756a9 100644 (file)
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -4,16 +4,12 @@ include $(top_srcdir)/scripts/subarch.include
  ARCH            ?= $(SUBARCH)
  
  ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
-ifeq ($(ARCH),x86)
-       ARCH_DIR := x86_64
-else ifeq ($(ARCH),arm64)
-       ARCH_DIR := aarch64
-else ifeq ($(ARCH),s390)
-       ARCH_DIR := s390x
+# Top-level selftests allows ARCH=x86_64 :-(
+ifeq ($(ARCH),x86_64)
+       ARCH_DIR := x86
  else
         ARCH_DIR := $(ARCH)
  endif
-
  include Makefile.kvm
  else
  # Empty targets for unsupported architectures
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm

index e988a72f8c20cd5cbda3646341ac6e6a1e1a2f00..9888dd6bb483ceb4189d81c740e334125c442020 100644 (file)
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -18,177 +18,177 @@ LIBKVM += lib/userfaultfd_util.c
  
  LIBKVM_STRING += lib/string_override.c
  
-LIBKVM_x86_64 += lib/x86_64/apic.c
-LIBKVM_x86_64 += lib/x86_64/handlers.S
-LIBKVM_x86_64 += lib/x86_64/hyperv.c
-LIBKVM_x86_64 += lib/x86_64/memstress.c
-LIBKVM_x86_64 += lib/x86_64/pmu.c
-LIBKVM_x86_64 += lib/x86_64/processor.c
-LIBKVM_x86_64 += lib/x86_64/sev.c
-LIBKVM_x86_64 += lib/x86_64/svm.c
-LIBKVM_x86_64 += lib/x86_64/ucall.c
-LIBKVM_x86_64 += lib/x86_64/vmx.c
-
-LIBKVM_aarch64 += lib/aarch64/gic.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
-LIBKVM_aarch64 += lib/aarch64/handlers.S
-LIBKVM_aarch64 += lib/aarch64/processor.c
-LIBKVM_aarch64 += lib/aarch64/spinlock.c
-LIBKVM_aarch64 += lib/aarch64/ucall.c
-LIBKVM_aarch64 += lib/aarch64/vgic.c
-
-LIBKVM_s390x += lib/s390x/diag318_test_handler.c
-LIBKVM_s390x += lib/s390x/processor.c
-LIBKVM_s390x += lib/s390x/ucall.c
-LIBKVM_s390x += lib/s390x/facility.c
+LIBKVM_x86 += lib/x86/apic.c
+LIBKVM_x86 += lib/x86/handlers.S
+LIBKVM_x86 += lib/x86/hyperv.c
+LIBKVM_x86 += lib/x86/memstress.c
+LIBKVM_x86 += lib/x86/pmu.c
+LIBKVM_x86 += lib/x86/processor.c
+LIBKVM_x86 += lib/x86/sev.c
+LIBKVM_x86 += lib/x86/svm.c
+LIBKVM_x86 += lib/x86/ucall.c
+LIBKVM_x86 += lib/x86/vmx.c
+
+LIBKVM_arm64 += lib/arm64/gic.c
+LIBKVM_arm64 += lib/arm64/gic_v3.c
+LIBKVM_arm64 += lib/arm64/gic_v3_its.c
+LIBKVM_arm64 += lib/arm64/handlers.S
+LIBKVM_arm64 += lib/arm64/processor.c
+LIBKVM_arm64 += lib/arm64/spinlock.c
+LIBKVM_arm64 += lib/arm64/ucall.c
+LIBKVM_arm64 += lib/arm64/vgic.c
+
+LIBKVM_s390 += lib/s390/diag318_test_handler.c
+LIBKVM_s390 += lib/s390/processor.c
+LIBKVM_s390 += lib/s390/ucall.c
+LIBKVM_s390 += lib/s390/facility.c
  
  LIBKVM_riscv += lib/riscv/handlers.S
  LIBKVM_riscv += lib/riscv/processor.c
  LIBKVM_riscv += lib/riscv/ucall.c
  
  # Non-compiled test targets
-TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
+TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
  
  # Compiled test targets
-TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
-TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
-TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
-TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
-TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
-TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
-TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
-TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
-TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
-TEST_GEN_PROGS_x86_64 += x86_64/smm_test
-TEST_GEN_PROGS_x86_64 += x86_64/state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
-TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
-TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
-TEST_GEN_PROGS_x86_64 += x86_64/amx_test
-TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
-TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
-TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
-TEST_GEN_PROGS_x86_64 += coalesced_io_test
-TEST_GEN_PROGS_x86_64 += demand_paging_test
-TEST_GEN_PROGS_x86_64 += dirty_log_test
-TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
-TEST_GEN_PROGS_x86_64 += guest_memfd_test
-TEST_GEN_PROGS_x86_64 += guest_print_test
-TEST_GEN_PROGS_x86_64 += hardware_disable_test
-TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86_64 += kvm_page_table_test
-TEST_GEN_PROGS_x86_64 += mmu_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_perf_test
-TEST_GEN_PROGS_x86_64 += rseq_test
-TEST_GEN_PROGS_x86_64 += set_memory_region_test
-TEST_GEN_PROGS_x86_64 += steal_time
-TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
-TEST_GEN_PROGS_x86_64 += system_counter_offset_test
-TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
+TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
+TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
+TEST_GEN_PROGS_x86 += x86/hyperv_clock
+TEST_GEN_PROGS_x86 += x86/hyperv_cpuid
+TEST_GEN_PROGS_x86 += x86/hyperv_evmcs
+TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86 += x86/hyperv_features
+TEST_GEN_PROGS_x86 += x86/hyperv_ipi
+TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
+TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
+TEST_GEN_PROGS_x86 += x86/kvm_clock_test
+TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/platform_info_test
+TEST_GEN_PROGS_x86 += x86/pmu_counters_test
+TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
+TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
+TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
+TEST_GEN_PROGS_x86 += x86/set_sregs_test
+TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
+TEST_GEN_PROGS_x86 += x86/smm_test
+TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
+TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
+TEST_GEN_PROGS_x86 += x86/sync_regs_test
+TEST_GEN_PROGS_x86 += x86/ucna_injection_test
+TEST_GEN_PROGS_x86 += x86/userspace_io_test
+TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
+TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
+TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
+TEST_GEN_PROGS_x86 += x86/xss_msr_test
+TEST_GEN_PROGS_x86 += x86/debug_regs
+TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
+TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_init2_tests
+TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
+TEST_GEN_PROGS_x86 += x86/sev_smoke_test
+TEST_GEN_PROGS_x86 += x86/amx_test
+TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
+TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += access_tracking_perf_test
+TEST_GEN_PROGS_x86 += coalesced_io_test
+TEST_GEN_PROGS_x86 += demand_paging_test
+TEST_GEN_PROGS_x86 += dirty_log_test
+TEST_GEN_PROGS_x86 += dirty_log_perf_test
+TEST_GEN_PROGS_x86 += guest_memfd_test
+TEST_GEN_PROGS_x86 += guest_print_test
+TEST_GEN_PROGS_x86 += hardware_disable_test
+TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86 += kvm_page_table_test
+TEST_GEN_PROGS_x86 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86 += memslot_perf_test
+TEST_GEN_PROGS_x86 += mmu_stress_test
+TEST_GEN_PROGS_x86 += rseq_test
+TEST_GEN_PROGS_x86 += set_memory_region_test
+TEST_GEN_PROGS_x86 += steal_time
+TEST_GEN_PROGS_x86 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86 += system_counter_offset_test
+TEST_GEN_PROGS_x86 += pre_fault_memory_test
  
  # Compiled outputs used by test targets
-TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
-
-TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
-TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
-TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort
-TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
-TEST_GEN_PROGS_aarch64 += aarch64/psci_test
-TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
-TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
-TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
-TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
-TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
-TEST_GEN_PROGS_aarch64 += arch_timer
-TEST_GEN_PROGS_aarch64 += coalesced_io_test
-TEST_GEN_PROGS_aarch64 += demand_paging_test
-TEST_GEN_PROGS_aarch64 += dirty_log_test
-TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-TEST_GEN_PROGS_aarch64 += guest_print_test
-TEST_GEN_PROGS_aarch64 += get-reg-list
-TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_aarch64 += kvm_page_table_test
-TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
-TEST_GEN_PROGS_aarch64 += memslot_perf_test
-TEST_GEN_PROGS_aarch64 += mmu_stress_test
-TEST_GEN_PROGS_aarch64 += rseq_test
-TEST_GEN_PROGS_aarch64 += set_memory_region_test
-TEST_GEN_PROGS_aarch64 += steal_time
-TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
-
-TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/resets
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
-TEST_GEN_PROGS_s390x += s390x/tprot
-TEST_GEN_PROGS_s390x += s390x/cmma_test
-TEST_GEN_PROGS_s390x += s390x/debug_test
-TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test
-TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
-TEST_GEN_PROGS_s390x += s390x/ucontrol_test
-TEST_GEN_PROGS_s390x += demand_paging_test
-TEST_GEN_PROGS_s390x += dirty_log_test
-TEST_GEN_PROGS_s390x += guest_print_test
-TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390x += kvm_page_table_test
-TEST_GEN_PROGS_s390x += rseq_test
-TEST_GEN_PROGS_s390x += set_memory_region_test
-TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+
+TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
+TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/page_fault_test
+TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/set_id_regs
+TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+TEST_GEN_PROGS_arm64 += arm64/vgic_init
+TEST_GEN_PROGS_arm64 += arm64/vgic_irq
+TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
+TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += access_tracking_perf_test
+TEST_GEN_PROGS_arm64 += arch_timer
+TEST_GEN_PROGS_arm64 += coalesced_io_test
+TEST_GEN_PROGS_arm64 += demand_paging_test
+TEST_GEN_PROGS_arm64 += dirty_log_test
+TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += guest_print_test
+TEST_GEN_PROGS_arm64 += get-reg-list
+TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_arm64 += kvm_page_table_test
+TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
+TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += mmu_stress_test
+TEST_GEN_PROGS_arm64 += rseq_test
+TEST_GEN_PROGS_arm64 += set_memory_region_test
+TEST_GEN_PROGS_arm64 += steal_time
+TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 += s390/resets
+TEST_GEN_PROGS_s390 += s390/sync_regs_test
+TEST_GEN_PROGS_s390 += s390/tprot
+TEST_GEN_PROGS_s390 += s390/cmma_test
+TEST_GEN_PROGS_s390 += s390/debug_test
+TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
+TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
+TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += demand_paging_test
+TEST_GEN_PROGS_s390 += dirty_log_test
+TEST_GEN_PROGS_s390 += guest_print_test
+TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += rseq_test
+TEST_GEN_PROGS_s390 += set_memory_region_test
+TEST_GEN_PROGS_s390 += kvm_binary_stats_test
  
  TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
  TEST_GEN_PROGS_riscv += riscv/ebreak_test
@@ -222,11 +222,7 @@ include ../lib.mk
  INSTALL_HDR_PATH = $(top_srcdir)/usr
  LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
  LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-ifeq ($(ARCH),x86_64)
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
-else
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
-endif
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH_DIR)/include
  CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
         -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
         -fno-builtin-memcmp -fno-builtin-memcpy \
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c

deleted file mode 100644 (file)

index 447d61c..0000000
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
- *
- * Copyright (c) 2022 Google LLC.
- *
- * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
- * and WI from userspace.
- */
-
-#include <stdint.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-#include <linux/bitfield.h>
-
-#define BAD_ID_REG_VAL 0x1badc0deul
-
-#define GUEST_ASSERT_REG_RAZ(reg)      GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
-
-static void guest_main(void)
-{
-       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
-       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
-       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
-       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
-
-       GUEST_DONE();
-}
-
-static void test_guest_raz(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-}
-
-static uint64_t raz_wi_reg_ids[] = {
-       KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
-       KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
-};
-
-static void test_user_raz_wi(struct kvm_vcpu *vcpu)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
-               uint64_t reg_id = raz_wi_reg_ids[i];
-               uint64_t val;
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-
-               /*
-                * Expect the ioctl to succeed with no effect on the register
-                * value.
-                */
-               vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-       }
-}
-
-static uint64_t raz_invariant_reg_ids[] = {
-       KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
-       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
-       KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
-       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
-};
-
-static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
-{
-       int i, r;
-
-       for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
-               uint64_t reg_id = raz_invariant_reg_ids[i];
-               uint64_t val;
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-
-               r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
-               TEST_ASSERT(r < 0 && errno == EINVAL,
-                           "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-       }
-}
-
-
-
-static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
-{
-       uint64_t val, el0;
-
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-
-       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
-       return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       TEST_REQUIRE(vcpu_aarch64_only(vcpu));
-
-       test_user_raz_wi(vcpu);
-       test_user_raz_invariant(vcpu);
-       test_guest_raz(vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c

deleted file mode 100644 (file)

index eeba1cc..0000000
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ /dev/null
@@ -1,220 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * The test validates both the virtual and physical timer IRQs using
- * CVAL and TVAL registers.
- *
- * Copyright (c) 2021, Google LLC.
- */
-#include "arch_timer.h"
-#include "delay.h"
-#include "gic.h"
-#include "processor.h"
-#include "timer_test.h"
-#include "ucall_common.h"
-#include "vgic.h"
-
-enum guest_stage {
-       GUEST_STAGE_VTIMER_CVAL = 1,
-       GUEST_STAGE_VTIMER_TVAL,
-       GUEST_STAGE_PTIMER_CVAL,
-       GUEST_STAGE_PTIMER_TVAL,
-       GUEST_STAGE_MAX,
-};
-
-static int vtimer_irq, ptimer_irq;
-
-static void
-guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
-{
-       switch (shared_data->guest_stage) {
-       case GUEST_STAGE_VTIMER_CVAL:
-               timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(VIRTUAL);
-               timer_set_ctl(VIRTUAL, CTL_ENABLE);
-               break;
-       case GUEST_STAGE_VTIMER_TVAL:
-               timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(VIRTUAL);
-               timer_set_ctl(VIRTUAL, CTL_ENABLE);
-               break;
-       case GUEST_STAGE_PTIMER_CVAL:
-               timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(PHYSICAL);
-               timer_set_ctl(PHYSICAL, CTL_ENABLE);
-               break;
-       case GUEST_STAGE_PTIMER_TVAL:
-               timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(PHYSICAL);
-               timer_set_ctl(PHYSICAL, CTL_ENABLE);
-               break;
-       default:
-               GUEST_ASSERT(0);
-       }
-}
-
-static void guest_validate_irq(unsigned int intid,
-                               struct test_vcpu_shared_data *shared_data)
-{
-       enum guest_stage stage = shared_data->guest_stage;
-       uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
-       unsigned long xctl = 0;
-       unsigned int timer_irq = 0;
-       unsigned int accessor;
-
-       if (intid == IAR_SPURIOUS)
-               return;
-
-       switch (stage) {
-       case GUEST_STAGE_VTIMER_CVAL:
-       case GUEST_STAGE_VTIMER_TVAL:
-               accessor = VIRTUAL;
-               timer_irq = vtimer_irq;
-               break;
-       case GUEST_STAGE_PTIMER_CVAL:
-       case GUEST_STAGE_PTIMER_TVAL:
-               accessor = PHYSICAL;
-               timer_irq = ptimer_irq;
-               break;
-       default:
-               GUEST_ASSERT(0);
-               return;
-       }
-
-       xctl = timer_get_ctl(accessor);
-       if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
-               return;
-
-       timer_set_ctl(accessor, CTL_IMASK);
-       xcnt = timer_get_cntct(accessor);
-       cval = timer_get_cval(accessor);
-
-       xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
-
-       /* Make sure we are dealing with the correct timer IRQ */
-       GUEST_ASSERT_EQ(intid, timer_irq);
-
-       /* Basic 'timer condition met' check */
-       __GUEST_ASSERT(xcnt >= cval,
-                      "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
-                      xcnt, cval, xcnt_diff_us);
-       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
-
-       WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
-}
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
-       unsigned int intid = gic_get_and_ack_irq();
-       uint32_t cpu = guest_get_vcpuid();
-       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
-
-       guest_validate_irq(intid, shared_data);
-
-       gic_set_eoi(intid);
-}
-
-static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
-                               enum guest_stage stage)
-{
-       uint32_t irq_iter, config_iter;
-
-       shared_data->guest_stage = stage;
-       shared_data->nr_iter = 0;
-
-       for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
-               /* Setup the next interrupt */
-               guest_configure_timer_action(shared_data);
-
-               /* Setup a timeout for the interrupt to arrive */
-               udelay(msecs_to_usecs(test_args.timer_period_ms) +
-                       test_args.timer_err_margin_us);
-
-               irq_iter = READ_ONCE(shared_data->nr_iter);
-               __GUEST_ASSERT(config_iter + 1 == irq_iter,
-                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
-                               "  Guest timer interrupt was not triggered within the specified\n"
-                               "  interval, try to increase the error margin by [-e] option.\n",
-                               config_iter + 1, irq_iter);
-       }
-}
-
-static void guest_code(void)
-{
-       uint32_t cpu = guest_get_vcpuid();
-       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
-
-       local_irq_disable();
-
-       gic_init(GIC_V3, test_args.nr_vcpus);
-
-       timer_set_ctl(VIRTUAL, CTL_IMASK);
-       timer_set_ctl(PHYSICAL, CTL_IMASK);
-
-       gic_irq_enable(vtimer_irq);
-       gic_irq_enable(ptimer_irq);
-       local_irq_enable();
-
-       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
-       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
-       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
-       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
-
-       GUEST_DONE();
-}
-
-static void test_init_timer_irq(struct kvm_vm *vm)
-{
-       /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
-       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
-       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
-
-       sync_global_to_guest(vm, ptimer_irq);
-       sync_global_to_guest(vm, vtimer_irq);
-
-       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
-}
-
-static int gic_fd;
-
-struct kvm_vm *test_vm_create(void)
-{
-       struct kvm_vm *vm;
-       unsigned int i;
-       int nr_vcpus = test_args.nr_vcpus;
-
-       vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
-
-       vm_init_descriptor_tables(vm);
-       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
-
-       if (!test_args.reserved) {
-               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
-                       struct kvm_arm_counter_offset offset = {
-                               .counter_offset = test_args.counter_offset,
-                               .reserved = 0,
-                       };
-                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
-               } else
-                       TEST_FAIL("no support for global offset");
-       }
-
-       for (i = 0; i < nr_vcpus; i++)
-               vcpu_init_descriptor_tables(vcpus[i]);
-
-       test_init_timer_irq(vm);
-       gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
-       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
-
-       /* Make all the test's cmdline args visible to the guest */
-       sync_global_to_guest(vm, test_args);
-
-       return vm;
-}
-
-void test_vm_cleanup(struct kvm_vm *vm)
-{
-       close(gic_fd);
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c

deleted file mode 100644 (file)

index a36a7e2..0000000
--- a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
+++ /dev/null
@@ -1,1062 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
- *
- * The test validates some edge cases related to the arch-timer:
- * - timers above the max TVAL value.
- * - timers in the past
- * - moving counters ahead and behind pending timers.
- * - reprograming timers.
- * - timers fired multiple times.
- * - masking/unmasking using the timer control mask.
- *
- * Copyright (c) 2021, Google LLC.
- */
-
-#define _GNU_SOURCE
-
-#include <pthread.h>
-#include <sys/sysinfo.h>
-
-#include "arch_timer.h"
-#include "gic.h"
-#include "vgic.h"
-
-static const uint64_t CVAL_MAX = ~0ULL;
-/* tval is a signed 32-bit int. */
-static const int32_t TVAL_MAX = INT32_MAX;
-static const int32_t TVAL_MIN = INT32_MIN;
-
-/* After how much time we say there is no IRQ. */
-static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
-
-/* Number of runs. */
-static const uint32_t NR_TEST_ITERS_DEF = 5;
-
-/* Default wait test time in ms. */
-static const uint32_t WAIT_TEST_MS = 10;
-
-/* Default "long" wait test time in ms. */
-static const uint32_t LONG_WAIT_TEST_MS = 100;
-
-/* Shared with IRQ handler. */
-struct test_vcpu_shared_data {
-       atomic_t handled;
-       atomic_t spurious;
-} shared_data;
-
-struct test_args {
-       /* Virtual or physical timer and counter tests. */
-       enum arch_timer timer;
-       /* Delay used for most timer tests. */
-       uint64_t wait_ms;
-       /* Delay used in the test_long_timer_delays test. */
-       uint64_t long_wait_ms;
-       /* Number of iterations. */
-       int iterations;
-       /* Whether to test the physical timer. */
-       bool test_physical;
-       /* Whether to test the virtual timer. */
-       bool test_virtual;
-};
-
-struct test_args test_args = {
-       .wait_ms = WAIT_TEST_MS,
-       .long_wait_ms = LONG_WAIT_TEST_MS,
-       .iterations = NR_TEST_ITERS_DEF,
-       .test_physical = true,
-       .test_virtual = true,
-};
-
-static int vtimer_irq, ptimer_irq;
-
-enum sync_cmd {
-       SET_COUNTER_VALUE,
-       USERSPACE_USLEEP,
-       USERSPACE_SCHED_YIELD,
-       USERSPACE_MIGRATE_SELF,
-       NO_USERSPACE_CMD,
-};
-
-typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
-
-static void sleep_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
-static void sleep_migrate(enum arch_timer timer, uint64_t usec);
-
-sleep_method_t sleep_method[] = {
-       sleep_poll,
-       sleep_sched_poll,
-       sleep_migrate,
-       sleep_in_userspace,
-};
-
-typedef void (*irq_wait_method_t)(void);
-
-static void wait_for_non_spurious_irq(void);
-static void wait_poll_for_irq(void);
-static void wait_sched_poll_for_irq(void);
-static void wait_migrate_poll_for_irq(void);
-
-irq_wait_method_t irq_wait_method[] = {
-       wait_for_non_spurious_irq,
-       wait_poll_for_irq,
-       wait_sched_poll_for_irq,
-       wait_migrate_poll_for_irq,
-};
-
-enum timer_view {
-       TIMER_CVAL,
-       TIMER_TVAL,
-};
-
-static void assert_irqs_handled(uint32_t n)
-{
-       int h = atomic_read(&shared_data.handled);
-
-       __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
-}
-
-static void userspace_cmd(uint64_t cmd)
-{
-       GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
-}
-
-static void userspace_migrate_vcpu(void)
-{
-       userspace_cmd(USERSPACE_MIGRATE_SELF);
-}
-
-static void userspace_sleep(uint64_t usecs)
-{
-       GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
-}
-
-static void set_counter(enum arch_timer timer, uint64_t counter)
-{
-       GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
-}
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
-       unsigned int intid = gic_get_and_ack_irq();
-       enum arch_timer timer;
-       uint64_t cnt, cval;
-       uint32_t ctl;
-       bool timer_condition, istatus;
-
-       if (intid == IAR_SPURIOUS) {
-               atomic_inc(&shared_data.spurious);
-               goto out;
-       }
-
-       if (intid == ptimer_irq)
-               timer = PHYSICAL;
-       else if (intid == vtimer_irq)
-               timer = VIRTUAL;
-       else
-               goto out;
-
-       ctl = timer_get_ctl(timer);
-       cval = timer_get_cval(timer);
-       cnt = timer_get_cntct(timer);
-       timer_condition = cnt >= cval;
-       istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
-       GUEST_ASSERT_EQ(timer_condition, istatus);
-
-       /* Disable and mask the timer. */
-       timer_set_ctl(timer, CTL_IMASK);
-
-       atomic_inc(&shared_data.handled);
-
-out:
-       gic_set_eoi(intid);
-}
-
-static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
-                        uint32_t ctl)
-{
-       atomic_set(&shared_data.handled, 0);
-       atomic_set(&shared_data.spurious, 0);
-       timer_set_cval(timer, cval_cycles);
-       timer_set_ctl(timer, ctl);
-}
-
-static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
-                        uint32_t ctl)
-{
-       atomic_set(&shared_data.handled, 0);
-       atomic_set(&shared_data.spurious, 0);
-       timer_set_ctl(timer, ctl);
-       timer_set_tval(timer, tval_cycles);
-}
-
-static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
-                        enum timer_view tv)
-{
-       switch (tv) {
-       case TIMER_CVAL:
-               set_cval_irq(timer, xval, ctl);
-               break;
-       case TIMER_TVAL:
-               set_tval_irq(timer, xval, ctl);
-               break;
-       default:
-               GUEST_FAIL("Could not get timer %d", timer);
-       }
-}
-
-/*
- * Note that this can theoretically hang forever, so we rely on having
- * a timeout mechanism in the "runner", like:
- * tools/testing/selftests/kselftest/runner.sh.
- */
-static void wait_for_non_spurious_irq(void)
-{
-       int h;
-
-       local_irq_disable();
-
-       for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
-               wfi();
-               local_irq_enable();
-               isb(); /* handle IRQ */
-               local_irq_disable();
-       }
-}
-
-/*
- * Wait for an non-spurious IRQ by polling in the guest or in
- * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
- *
- * Note that this can theoretically hang forever, so we rely on having
- * a timeout mechanism in the "runner", like:
- * tools/testing/selftests/kselftest/runner.sh.
- */
-static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
-{
-       int h;
-
-       local_irq_disable();
-
-       h = atomic_read(&shared_data.handled);
-
-       local_irq_enable();
-       while (h == atomic_read(&shared_data.handled)) {
-               if (usp_cmd == NO_USERSPACE_CMD)
-                       cpu_relax();
-               else
-                       userspace_cmd(usp_cmd);
-       }
-       local_irq_disable();
-}
-
-static void wait_poll_for_irq(void)
-{
-       poll_for_non_spurious_irq(NO_USERSPACE_CMD);
-}
-
-static void wait_sched_poll_for_irq(void)
-{
-       poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
-}
-
-static void wait_migrate_poll_for_irq(void)
-{
-       poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
-}
-
-/*
- * Sleep for usec microseconds by polling in the guest or in
- * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
- */
-static void guest_poll(enum arch_timer test_timer, uint64_t usec,
-                      enum sync_cmd usp_cmd)
-{
-       uint64_t cycles = usec_to_cycles(usec);
-       /* Whichever timer we are testing with, sleep with the other. */
-       enum arch_timer sleep_timer = 1 - test_timer;
-       uint64_t start = timer_get_cntct(sleep_timer);
-
-       while ((timer_get_cntct(sleep_timer) - start) < cycles) {
-               if (usp_cmd == NO_USERSPACE_CMD)
-                       cpu_relax();
-               else
-                       userspace_cmd(usp_cmd);
-       }
-}
-
-static void sleep_poll(enum arch_timer timer, uint64_t usec)
-{
-       guest_poll(timer, usec, NO_USERSPACE_CMD);
-}
-
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
-{
-       guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
-}
-
-static void sleep_migrate(enum arch_timer timer, uint64_t usec)
-{
-       guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
-}
-
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
-{
-       userspace_sleep(usec);
-}
-
-/*
- * Reset the timer state to some nice values like the counter not being close
- * to the edge, and the control register masked and disabled.
- */
-static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
-{
-       set_counter(timer, cnt);
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_timer_xval(enum arch_timer timer, uint64_t xval,
-                           enum timer_view tv, irq_wait_method_t wm, bool reset_state,
-                           uint64_t reset_cnt)
-{
-       local_irq_disable();
-
-       if (reset_state)
-               reset_timer_state(timer, reset_cnt);
-
-       set_xval_irq(timer, xval, CTL_ENABLE, tv);
-
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/*
- * The test_timer_* functions will program the timer, wait for it, and assert
- * the firing of the correct IRQ.
- *
- * These functions don't have a timeout and return as soon as they receive an
- * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
- * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
- */
-
-static void test_timer_cval(enum arch_timer timer, uint64_t cval,
-                           irq_wait_method_t wm, bool reset_state,
-                           uint64_t reset_cnt)
-{
-       test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
-}
-
-static void test_timer_tval(enum arch_timer timer, int32_t tval,
-                           irq_wait_method_t wm, bool reset_state,
-                           uint64_t reset_cnt)
-{
-       test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
-                       reset_cnt);
-}
-
-static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
-                                  uint64_t usec, enum timer_view timer_view,
-                                  sleep_method_t guest_sleep)
-{
-       local_irq_disable();
-
-       set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
-       guest_sleep(timer, usec);
-
-       local_irq_enable();
-       isb();
-
-       /* Assume success (no IRQ) after waiting usec microseconds */
-       assert_irqs_handled(0);
-}
-
-static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
-                            uint64_t usec, sleep_method_t wm)
-{
-       test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
-}
-
-static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
-                            sleep_method_t wm)
-{
-       /* tval will be cast to an int32_t in test_xval_check_no_irq */
-       test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
-}
-
-/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
-static void test_timer_control_mask_then_unmask(enum arch_timer timer)
-{
-       reset_timer_state(timer, DEF_CNT);
-       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
-
-       /* Unmask the timer, and then get an IRQ. */
-       local_irq_disable();
-       timer_set_ctl(timer, CTL_ENABLE);
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wait_for_non_spurious_irq();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/* Check that timer control masks actually mask a timer being fired. */
-static void test_timer_control_masks(enum arch_timer timer)
-{
-       reset_timer_state(timer, DEF_CNT);
-
-       /* Local IRQs are not masked at this point. */
-
-       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
-
-       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
-       sleep_poll(timer, TIMEOUT_NO_IRQ_US);
-
-       assert_irqs_handled(0);
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_fire_a_timer_multiple_times(enum arch_timer timer,
-                                            irq_wait_method_t wm, int num)
-{
-       int i;
-
-       local_irq_disable();
-       reset_timer_state(timer, DEF_CNT);
-
-       set_tval_irq(timer, 0, CTL_ENABLE);
-
-       for (i = 1; i <= num; i++) {
-               /* This method re-enables IRQs to handle the one we're looking for. */
-               wm();
-
-               /* The IRQ handler masked and disabled the timer.
-                * Enable and unmmask it again.
-                */
-               timer_set_ctl(timer, CTL_ENABLE);
-
-               assert_irqs_handled(i);
-       }
-
-       local_irq_enable();
-}
-
-static void test_timers_fired_multiple_times(enum arch_timer timer)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
-               test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
-}
-
-/*
- * Set a timer for tval=delta_1_ms then reprogram it to
- * tval=delta_2_ms. Check that we get the timer fired. There is no
- * timeout for the wait: we use the wfi instruction.
- */
-static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
-                                    int32_t delta_1_ms, int32_t delta_2_ms)
-{
-       local_irq_disable();
-       reset_timer_state(timer, DEF_CNT);
-
-       /* Program the timer to DEF_CNT + delta_1_ms. */
-       set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
-
-       /* Reprogram the timer to DEF_CNT + delta_2_ms. */
-       timer_set_tval(timer, msec_to_cycles(delta_2_ms));
-
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
-       GUEST_ASSERT(timer_get_cntct(timer) >=
-                    DEF_CNT + msec_to_cycles(delta_2_ms));
-
-       local_irq_enable();
-       assert_irqs_handled(1);
-};
-
-static void test_reprogram_timers(enum arch_timer timer)
-{
-       int i;
-       uint64_t base_wait = test_args.wait_ms;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               /*
-                * Ensure reprogramming works whether going from a
-                * longer time to a shorter or vice versa.
-                */
-               test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
-                                        base_wait);
-               test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
-                                        2 * base_wait);
-       }
-}
-
-static void test_basic_functionality(enum arch_timer timer)
-{
-       int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
-       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               test_timer_cval(timer, cval, wm, true, DEF_CNT);
-               test_timer_tval(timer, tval, wm, true, DEF_CNT);
-       }
-}
-
-/*
- * This test checks basic timer behavior without actually firing timers, things
- * like: the relationship between cval and tval, tval down-counting.
- */
-static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
-{
-       reset_timer_state(timer, DEF_CNT);
-
-       local_irq_disable();
-
-       /* cval in the past */
-       timer_set_cval(timer,
-                      timer_get_cntct(timer) -
-                      msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_tval(timer) < 0);
-
-       /* tval in the past */
-       timer_set_tval(timer, -1);
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
-
-       /* tval larger than TVAL_MAX. This requires programming with
-        * timer_set_cval instead so the value is expressible
-        */
-       timer_set_cval(timer,
-                      timer_get_cntct(timer) + TVAL_MAX +
-                      msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_tval(timer) <= 0);
-
-       /*
-        * tval larger than 2 * TVAL_MAX.
-        * Twice the TVAL_MAX completely loops around the TVAL.
-        */
-       timer_set_cval(timer,
-                      timer_get_cntct(timer) + 2ULL * TVAL_MAX +
-                      msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_tval(timer) <=
-                      msec_to_cycles(test_args.wait_ms));
-
-       /* negative tval that rollovers from 0. */
-       set_counter(timer, msec_to_cycles(1));
-       timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
-
-       /* tval should keep down-counting from 0 to -1. */
-       timer_set_tval(timer, 0);
-       sleep_poll(timer, 1);
-       GUEST_ASSERT(timer_get_tval(timer) < 0);
-
-       local_irq_enable();
-
-       /* Mask and disable any pending timer. */
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_timers_sanity_checks(enum arch_timer timer)
-{
-       timers_sanity_checks(timer, false);
-       /* Check how KVM saves/restores these edge-case values. */
-       timers_sanity_checks(timer, true);
-}
-
-static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
-{
-       local_irq_disable();
-       reset_timer_state(timer, DEF_CNT);
-
-       set_cval_irq(timer,
-                    (uint64_t) TVAL_MAX +
-                    msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
-
-       set_counter(timer, TVAL_MAX);
-
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
-static void test_timers_above_tval_max(enum arch_timer timer)
-{
-       uint64_t cval;
-       int i;
-
-       /*
-        * Test that the system is not implementing cval in terms of
-        * tval.  If that was the case, setting a cval to "cval = now
-        * + TVAL_MAX + wait_ms" would wrap to "cval = now +
-        * wait_ms", and the timer would fire immediately. Test that it
-        * doesn't.
-        */
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               reset_timer_state(timer, DEF_CNT);
-               cval = timer_get_cntct(timer) + TVAL_MAX +
-                       msec_to_cycles(test_args.wait_ms);
-               test_cval_no_irq(timer, cval,
-                                msecs_to_usecs(test_args.wait_ms) +
-                                TIMEOUT_NO_IRQ_US, sleep_method[i]);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               /* Get the IRQ by moving the counter forward. */
-               test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
-       }
-}
-
-/*
- * Template function to be used by the test_move_counter_ahead_* tests.  It
- * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
- * then waits for an IRQ.
- */
-static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
-                                   uint64_t xval, uint64_t cnt_2,
-                                   irq_wait_method_t wm, enum timer_view tv)
-{
-       local_irq_disable();
-
-       set_counter(timer, cnt_1);
-       timer_set_ctl(timer, CTL_IMASK);
-
-       set_xval_irq(timer, xval, CTL_ENABLE, tv);
-       set_counter(timer, cnt_2);
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/*
- * Template function to be used by the test_move_counter_ahead_* tests.  It
- * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
- * then waits for an IRQ.
- */
-static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
-                                          uint64_t cnt_1, uint64_t xval,
-                                          uint64_t cnt_2,
-                                          sleep_method_t guest_sleep,
-                                          enum timer_view tv)
-{
-       local_irq_disable();
-
-       set_counter(timer, cnt_1);
-       timer_set_ctl(timer, CTL_IMASK);
-
-       set_xval_irq(timer, xval, CTL_ENABLE, tv);
-       set_counter(timer, cnt_2);
-       guest_sleep(timer, TIMEOUT_NO_IRQ_US);
-
-       local_irq_enable();
-       isb();
-
-       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
-       assert_irqs_handled(0);
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
-                                   int32_t tval, uint64_t cnt_2,
-                                   irq_wait_method_t wm)
-{
-       test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
-}
-
-static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
-                                   uint64_t cval, uint64_t cnt_2,
-                                   irq_wait_method_t wm)
-{
-       test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
-}
-
-static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
-                                          uint64_t cnt_1, int32_t tval,
-                                          uint64_t cnt_2, sleep_method_t wm)
-{
-       test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
-                                      TIMER_TVAL);
-}
-
-static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
-                                          uint64_t cnt_1, uint64_t cval,
-                                          uint64_t cnt_2, sleep_method_t wm)
-{
-       test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
-                                      TIMER_CVAL);
-}
-
-/* Set a timer and then move the counter ahead of it. */
-static void test_move_counters_ahead_of_timers(enum arch_timer timer)
-{
-       int i;
-       int32_t tval;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
-               test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
-
-               /* Move counter ahead of negative tval. */
-               test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
-               test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
-               tval = TVAL_MAX;
-               test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
-                                       wm);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               sleep_method_t sm = sleep_method[i];
-
-               test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
-       }
-}
-
-/*
- * Program a timer, mask it, and then change the tval or counter to cancel it.
- * Unmask it and check that nothing fires.
- */
-static void test_move_counters_behind_timers(enum arch_timer timer)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               sleep_method_t sm = sleep_method[i];
-
-               test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
-                                              sm);
-               test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
-       }
-}
-
-static void test_timers_in_the_past(enum arch_timer timer)
-{
-       int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
-       uint64_t cval;
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               /* set a timer wait_ms the past. */
-               cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
-               test_timer_cval(timer, cval, wm, true, DEF_CNT);
-               test_timer_tval(timer, tval, wm, true, DEF_CNT);
-
-               /* Set a timer to counter=0 (in the past) */
-               test_timer_cval(timer, 0, wm, true, DEF_CNT);
-
-               /* Set a time for tval=0 (now) */
-               test_timer_tval(timer, 0, wm, true, DEF_CNT);
-
-               /* Set a timer to as far in the past as possible */
-               test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
-       }
-
-       /*
-        * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
-        * IRQ as that tval means cval=CVAL_MAX-wait_ms.
-        */
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               sleep_method_t sm = sleep_method[i];
-
-               set_counter(timer, msec_to_cycles(test_args.wait_ms));
-               test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
-       }
-}
-
-static void test_long_timer_delays(enum arch_timer timer)
-{
-       int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
-       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               test_timer_cval(timer, cval, wm, true, DEF_CNT);
-               test_timer_tval(timer, tval, wm, true, DEF_CNT);
-       }
-}
-
-static void guest_run_iteration(enum arch_timer timer)
-{
-       test_basic_functionality(timer);
-       test_timers_sanity_checks(timer);
-
-       test_timers_above_tval_max(timer);
-       test_timers_in_the_past(timer);
-
-       test_move_counters_ahead_of_timers(timer);
-       test_move_counters_behind_timers(timer);
-       test_reprogram_timers(timer);
-
-       test_timers_fired_multiple_times(timer);
-
-       test_timer_control_mask_then_unmask(timer);
-       test_timer_control_masks(timer);
-}
-
-static void guest_code(enum arch_timer timer)
-{
-       int i;
-
-       local_irq_disable();
-
-       gic_init(GIC_V3, 1);
-
-       timer_set_ctl(VIRTUAL, CTL_IMASK);
-       timer_set_ctl(PHYSICAL, CTL_IMASK);
-
-       gic_irq_enable(vtimer_irq);
-       gic_irq_enable(ptimer_irq);
-       local_irq_enable();
-
-       for (i = 0; i < test_args.iterations; i++) {
-               GUEST_SYNC(i);
-               guest_run_iteration(timer);
-       }
-
-       test_long_timer_delays(timer);
-       GUEST_DONE();
-}
-
-static uint32_t next_pcpu(void)
-{
-       uint32_t max = get_nprocs();
-       uint32_t cur = sched_getcpu();
-       uint32_t next = cur;
-       cpu_set_t cpuset;
-
-       TEST_ASSERT(max > 1, "Need at least two physical cpus");
-
-       sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
-       do {
-               next = (next + 1) % CPU_SETSIZE;
-       } while (!CPU_ISSET(next, &cpuset));
-
-       return next;
-}
-
-static void migrate_self(uint32_t new_pcpu)
-{
-       int ret;
-       cpu_set_t cpuset;
-       pthread_t thread;
-
-       thread = pthread_self();
-
-       CPU_ZERO(&cpuset);
-       CPU_SET(new_pcpu, &cpuset);
-
-       pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
-       ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
-       TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
-                   new_pcpu, ret);
-}
-
-static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
-                          enum arch_timer timer)
-{
-       if (timer == PHYSICAL)
-               vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
-       else
-               vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
-}
-
-static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
-{
-       enum sync_cmd cmd = uc->args[1];
-       uint64_t val = uc->args[2];
-       enum arch_timer timer = uc->args[3];
-
-       switch (cmd) {
-       case SET_COUNTER_VALUE:
-               kvm_set_cntxct(vcpu, val, timer);
-               break;
-       case USERSPACE_USLEEP:
-               usleep(val);
-               break;
-       case USERSPACE_SCHED_YIELD:
-               sched_yield();
-               break;
-       case USERSPACE_MIGRATE_SELF:
-               migrate_self(next_pcpu());
-               break;
-       default:
-               break;
-       }
-}
-
-static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       /* Start on CPU 0 */
-       migrate_self(0);
-
-       while (true) {
-               vcpu_run(vcpu);
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       handle_sync(vcpu, &uc);
-                       break;
-               case UCALL_DONE:
-                       goto out;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       goto out;
-               default:
-                       TEST_FAIL("Unexpected guest exit\n");
-               }
-       }
-
- out:
-       return;
-}
-
-static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
-       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
-
-       sync_global_to_guest(vm, ptimer_irq);
-       sync_global_to_guest(vm, vtimer_irq);
-
-       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
-}
-
-static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
-                          enum arch_timer timer)
-{
-       *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-       TEST_ASSERT(*vm, "Failed to create the test VM\n");
-
-       vm_init_descriptor_tables(*vm);
-       vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
-                                    guest_irq_handler);
-
-       vcpu_init_descriptor_tables(*vcpu);
-       vcpu_args_set(*vcpu, 1, timer);
-
-       test_init_timer_irq(*vm, *vcpu);
-       vgic_v3_setup(*vm, 1, 64);
-       sync_global_to_guest(*vm, test_args);
-}
-
-static void test_print_help(char *name)
-{
-       pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
-               , name);
-       pr_info("\t-i: Number of iterations (default: %u)\n",
-               NR_TEST_ITERS_DEF);
-       pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
-       pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
-            LONG_WAIT_TEST_MS);
-       pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
-               WAIT_TEST_MS);
-       pr_info("\t-p: Test physical timer (default: true)\n");
-       pr_info("\t-v: Test virtual timer (default: true)\n");
-       pr_info("\t-h: Print this help message\n");
-}
-
-static bool parse_args(int argc, char *argv[])
-{
-       int opt;
-
-       while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
-               switch (opt) {
-               case 'b':
-                       test_args.test_physical = true;
-                       test_args.test_virtual = true;
-                       break;
-               case 'i':
-                       test_args.iterations =
-                           atoi_positive("Number of iterations", optarg);
-                       break;
-               case 'l':
-                       test_args.long_wait_ms =
-                           atoi_positive("Long wait time", optarg);
-                       break;
-               case 'p':
-                       test_args.test_physical = true;
-                       test_args.test_virtual = false;
-                       break;
-               case 'v':
-                       test_args.test_virtual = true;
-                       test_args.test_physical = false;
-                       break;
-               case 'w':
-                       test_args.wait_ms = atoi_positive("Wait time", optarg);
-                       break;
-               case 'h':
-               default:
-                       goto err;
-               }
-       }
-
-       return true;
-
- err:
-       test_print_help(argv[0]);
-       return false;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /* Tell stdout not to buffer its content */
-       setbuf(stdout, NULL);
-
-       if (!parse_args(argc, argv))
-               exit(KSFT_SKIP);
-
-       if (test_args.test_virtual) {
-               test_vm_create(&vm, &vcpu, VIRTUAL);
-               test_run(vm, vcpu);
-               kvm_vm_free(vm);
-       }
-
-       if (test_args.test_physical) {
-               test_vm_create(&vm, &vcpu, PHYSICAL);
-               test_run(vm, vcpu);
-               kvm_vm_free(vm);
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c

deleted file mode 100644 (file)

index c7fb55c..0000000
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ /dev/null
@@ -1,607 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-#include <linux/bitfield.h>
-
-#define MDSCR_KDE      (1 << 13)
-#define MDSCR_MDE      (1 << 15)
-#define MDSCR_SS       (1 << 0)
-
-#define DBGBCR_LEN8    (0xff << 5)
-#define DBGBCR_EXEC    (0x0 << 3)
-#define DBGBCR_EL1     (0x1 << 1)
-#define DBGBCR_E       (0x1 << 0)
-#define DBGBCR_LBN_SHIFT       16
-#define DBGBCR_BT_SHIFT                20
-#define DBGBCR_BT_ADDR_LINK_CTX        (0x1 << DBGBCR_BT_SHIFT)
-#define DBGBCR_BT_CTX_LINK     (0x3 << DBGBCR_BT_SHIFT)
-
-#define DBGWCR_LEN8    (0xff << 5)
-#define DBGWCR_RD      (0x1 << 3)
-#define DBGWCR_WR      (0x2 << 3)
-#define DBGWCR_EL1     (0x1 << 1)
-#define DBGWCR_E       (0x1 << 0)
-#define DBGWCR_LBN_SHIFT       16
-#define DBGWCR_WT_SHIFT                20
-#define DBGWCR_WT_LINK         (0x1 << DBGWCR_WT_SHIFT)
-
-#define SPSR_D         (1 << 9)
-#define SPSR_SS                (1 << 21)
-
-extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
-extern unsigned char iter_ss_begin, iter_ss_end;
-static volatile uint64_t sw_bp_addr, hw_bp_addr;
-static volatile uint64_t wp_addr, wp_data_addr;
-static volatile uint64_t svc_addr;
-static volatile uint64_t ss_addr[4], ss_idx;
-#define  PC(v)  ((uint64_t)&(v))
-
-#define GEN_DEBUG_WRITE_REG(reg_name)                  \
-static void write_##reg_name(int num, uint64_t val)    \
-{                                                      \
-       switch (num) {                                  \
-       case 0:                                         \
-               write_sysreg(val, reg_name##0_el1);     \
-               break;                                  \
-       case 1:                                         \
-               write_sysreg(val, reg_name##1_el1);     \
-               break;                                  \
-       case 2:                                         \
-               write_sysreg(val, reg_name##2_el1);     \
-               break;                                  \
-       case 3:                                         \
-               write_sysreg(val, reg_name##3_el1);     \
-               break;                                  \
-       case 4:                                         \
-               write_sysreg(val, reg_name##4_el1);     \
-               break;                                  \
-       case 5:                                         \
-               write_sysreg(val, reg_name##5_el1);     \
-               break;                                  \
-       case 6:                                         \
-               write_sysreg(val, reg_name##6_el1);     \
-               break;                                  \
-       case 7:                                         \
-               write_sysreg(val, reg_name##7_el1);     \
-               break;                                  \
-       case 8:                                         \
-               write_sysreg(val, reg_name##8_el1);     \
-               break;                                  \
-       case 9:                                         \
-               write_sysreg(val, reg_name##9_el1);     \
-               break;                                  \
-       case 10:                                        \
-               write_sysreg(val, reg_name##10_el1);    \
-               break;                                  \
-       case 11:                                        \
-               write_sysreg(val, reg_name##11_el1);    \
-               break;                                  \
-       case 12:                                        \
-               write_sysreg(val, reg_name##12_el1);    \
-               break;                                  \
-       case 13:                                        \
-               write_sysreg(val, reg_name##13_el1);    \
-               break;                                  \
-       case 14:                                        \
-               write_sysreg(val, reg_name##14_el1);    \
-               break;                                  \
-       case 15:                                        \
-               write_sysreg(val, reg_name##15_el1);    \
-               break;                                  \
-       default:                                        \
-               GUEST_ASSERT(0);                        \
-       }                                               \
-}
-
-/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
-GEN_DEBUG_WRITE_REG(dbgbcr)
-GEN_DEBUG_WRITE_REG(dbgbvr)
-GEN_DEBUG_WRITE_REG(dbgwcr)
-GEN_DEBUG_WRITE_REG(dbgwvr)
-
-static void reset_debug_state(void)
-{
-       uint8_t brps, wrps, i;
-       uint64_t dfr0;
-
-       asm volatile("msr daifset, #8");
-
-       write_sysreg(0, osdlr_el1);
-       write_sysreg(0, oslar_el1);
-       isb();
-
-       write_sysreg(0, mdscr_el1);
-       write_sysreg(0, contextidr_el1);
-
-       /* Reset all bcr/bvr/wcr/wvr registers */
-       dfr0 = read_sysreg(id_aa64dfr0_el1);
-       brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
-       for (i = 0; i <= brps; i++) {
-               write_dbgbcr(i, 0);
-               write_dbgbvr(i, 0);
-       }
-       wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
-       for (i = 0; i <= wrps; i++) {
-               write_dbgwcr(i, 0);
-               write_dbgwvr(i, 0);
-       }
-
-       isb();
-}
-
-static void enable_os_lock(void)
-{
-       write_sysreg(1, oslar_el1);
-       isb();
-
-       GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
-}
-
-static void enable_monitor_debug_exceptions(void)
-{
-       uint32_t mdscr;
-
-       asm volatile("msr daifclr, #8");
-
-       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
-       write_sysreg(mdscr, mdscr_el1);
-       isb();
-}
-
-static void install_wp(uint8_t wpn, uint64_t addr)
-{
-       uint32_t wcr;
-
-       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
-       write_dbgwcr(wpn, wcr);
-       write_dbgwvr(wpn, addr);
-
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-static void install_hw_bp(uint8_t bpn, uint64_t addr)
-{
-       uint32_t bcr;
-
-       bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
-       write_dbgbcr(bpn, bcr);
-       write_dbgbvr(bpn, addr);
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
-                          uint64_t ctx)
-{
-       uint32_t wcr;
-       uint64_t ctx_bcr;
-
-       /* Setup a context-aware breakpoint for Linked Context ID Match */
-       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
-                 DBGBCR_BT_CTX_LINK;
-       write_dbgbcr(ctx_bp, ctx_bcr);
-       write_dbgbvr(ctx_bp, ctx);
-
-       /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
-       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
-             DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
-       write_dbgwcr(addr_wp, wcr);
-       write_dbgwvr(addr_wp, addr);
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
-                      uint64_t ctx)
-{
-       uint32_t addr_bcr, ctx_bcr;
-
-       /* Setup a context-aware breakpoint for Linked Context ID Match */
-       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
-                 DBGBCR_BT_CTX_LINK;
-       write_dbgbcr(ctx_bp, ctx_bcr);
-       write_dbgbvr(ctx_bp, ctx);
-
-       /*
-        * Setup a normal breakpoint for Linked Address Match, and link it
-        * to the context-aware breakpoint.
-        */
-       addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
-                  DBGBCR_BT_ADDR_LINK_CTX |
-                  ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
-       write_dbgbcr(addr_bp, addr_bcr);
-       write_dbgbvr(addr_bp, addr);
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-static void install_ss(void)
-{
-       uint32_t mdscr;
-
-       asm volatile("msr daifclr, #8");
-
-       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
-       write_sysreg(mdscr, mdscr_el1);
-       isb();
-}
-
-static volatile char write_data;
-
-static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
-{
-       uint64_t ctx = 0xabcdef;        /* a random context number */
-
-       /* Software-breakpoint */
-       reset_debug_state();
-       asm volatile("sw_bp: brk #0");
-       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
-
-       /* Hardware-breakpoint */
-       reset_debug_state();
-       install_hw_bp(bpn, PC(hw_bp));
-       asm volatile("hw_bp: nop");
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
-
-       /* Hardware-breakpoint + svc */
-       reset_debug_state();
-       install_hw_bp(bpn, PC(bp_svc));
-       asm volatile("bp_svc: svc #0");
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
-       GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
-
-       /* Hardware-breakpoint + software-breakpoint */
-       reset_debug_state();
-       install_hw_bp(bpn, PC(bp_brk));
-       asm volatile("bp_brk: brk #0");
-       GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
-
-       /* Watchpoint */
-       reset_debug_state();
-       install_wp(wpn, PC(write_data));
-       write_data = 'x';
-       GUEST_ASSERT_EQ(write_data, 'x');
-       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
-
-       /* Single-step */
-       reset_debug_state();
-       install_ss();
-       ss_idx = 0;
-       asm volatile("ss_start:\n"
-                    "mrs x0, esr_el1\n"
-                    "add x0, x0, #1\n"
-                    "msr daifset, #8\n"
-                    : : : "x0");
-       GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
-       GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
-       GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
-
-       /* OS Lock does not block software-breakpoint */
-       reset_debug_state();
-       enable_os_lock();
-       sw_bp_addr = 0;
-       asm volatile("sw_bp2: brk #0");
-       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
-
-       /* OS Lock blocking hardware-breakpoint */
-       reset_debug_state();
-       enable_os_lock();
-       install_hw_bp(bpn, PC(hw_bp2));
-       hw_bp_addr = 0;
-       asm volatile("hw_bp2: nop");
-       GUEST_ASSERT_EQ(hw_bp_addr, 0);
-
-       /* OS Lock blocking watchpoint */
-       reset_debug_state();
-       enable_os_lock();
-       write_data = '\0';
-       wp_data_addr = 0;
-       install_wp(wpn, PC(write_data));
-       write_data = 'x';
-       GUEST_ASSERT_EQ(write_data, 'x');
-       GUEST_ASSERT_EQ(wp_data_addr, 0);
-
-       /* OS Lock blocking single-step */
-       reset_debug_state();
-       enable_os_lock();
-       ss_addr[0] = 0;
-       install_ss();
-       ss_idx = 0;
-       asm volatile("mrs x0, esr_el1\n\t"
-                    "add x0, x0, #1\n\t"
-                    "msr daifset, #8\n\t"
-                    : : : "x0");
-       GUEST_ASSERT_EQ(ss_addr[0], 0);
-
-       /* Linked hardware-breakpoint */
-       hw_bp_addr = 0;
-       reset_debug_state();
-       install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
-       /* Set context id */
-       write_sysreg(ctx, contextidr_el1);
-       isb();
-       asm volatile("hw_bp_ctx: nop");
-       write_sysreg(0, contextidr_el1);
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
-
-       /* Linked watchpoint */
-       reset_debug_state();
-       install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
-       /* Set context id */
-       write_sysreg(ctx, contextidr_el1);
-       isb();
-       write_data = 'x';
-       GUEST_ASSERT_EQ(write_data, 'x');
-       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
-
-       GUEST_DONE();
-}
-
-static void guest_sw_bp_handler(struct ex_regs *regs)
-{
-       sw_bp_addr = regs->pc;
-       regs->pc += 4;
-}
-
-static void guest_hw_bp_handler(struct ex_regs *regs)
-{
-       hw_bp_addr = regs->pc;
-       regs->pstate |= SPSR_D;
-}
-
-static void guest_wp_handler(struct ex_regs *regs)
-{
-       wp_data_addr = read_sysreg(far_el1);
-       wp_addr = regs->pc;
-       regs->pstate |= SPSR_D;
-}
-
-static void guest_ss_handler(struct ex_regs *regs)
-{
-       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
-       ss_addr[ss_idx++] = regs->pc;
-       regs->pstate |= SPSR_SS;
-}
-
-static void guest_svc_handler(struct ex_regs *regs)
-{
-       svc_addr = regs->pc;
-}
-
-static void guest_code_ss(int test_cnt)
-{
-       uint64_t i;
-       uint64_t bvr, wvr, w_bvr, w_wvr;
-
-       for (i = 0; i < test_cnt; i++) {
-               /* Bits [1:0] of dbg{b,w}vr are RES0 */
-               w_bvr = i << 2;
-               w_wvr = i << 2;
-
-               /*
-                * Enable Single Step execution.  Note!  This _must_ be a bare
-                * ucall as the ucall() path uses atomic operations to manage
-                * the ucall structures, and the built-in "atomics" are usually
-                * implemented via exclusive access instructions.  The exlusive
-                * monitor is cleared on ERET, and so taking debug exceptions
-                * during a LDREX=>STREX sequence will prevent forward progress
-                * and hang the guest/test.
-                */
-               GUEST_UCALL_NONE();
-
-               /*
-                * The userspace will verify that the pc is as expected during
-                * single step execution between iter_ss_begin and iter_ss_end.
-                */
-               asm volatile("iter_ss_begin:nop\n");
-
-               write_sysreg(w_bvr, dbgbvr0_el1);
-               write_sysreg(w_wvr, dbgwvr0_el1);
-               bvr = read_sysreg(dbgbvr0_el1);
-               wvr = read_sysreg(dbgwvr0_el1);
-
-               /* Userspace disables Single Step when the end is nigh. */
-               asm volatile("iter_ss_end:\n");
-
-               GUEST_ASSERT_EQ(bvr, w_bvr);
-               GUEST_ASSERT_EQ(wvr, w_wvr);
-       }
-       GUEST_DONE();
-}
-
-static int debug_version(uint64_t id_aa64dfr0)
-{
-       return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
-}
-
-static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_BRK64, guest_sw_bp_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_SVC64, guest_svc_handler);
-
-       /* Specify bpn/wpn/ctx_bpn to be tested */
-       vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
-       pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
-
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               goto done;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
-
-void test_single_step_from_userspace(int test_cnt)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       struct kvm_run *run;
-       uint64_t pc, cmd;
-       uint64_t test_pc = 0;
-       bool ss_enable = false;
-       struct kvm_guest_debug debug = {};
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
-       run = vcpu->run;
-       vcpu_args_set(vcpu, 1, test_cnt);
-
-       while (1) {
-               vcpu_run(vcpu);
-               if (run->exit_reason != KVM_EXIT_DEBUG) {
-                       cmd = get_ucall(vcpu, &uc);
-                       if (cmd == UCALL_ABORT) {
-                               REPORT_GUEST_ASSERT(uc);
-                               /* NOT REACHED */
-                       } else if (cmd == UCALL_DONE) {
-                               break;
-                       }
-
-                       TEST_ASSERT(cmd == UCALL_NONE,
-                                   "Unexpected ucall cmd 0x%lx", cmd);
-
-                       debug.control = KVM_GUESTDBG_ENABLE |
-                                       KVM_GUESTDBG_SINGLESTEP;
-                       ss_enable = true;
-                       vcpu_guest_debug_set(vcpu, &debug);
-                       continue;
-               }
-
-               TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
-
-               /* Check if the current pc is expected. */
-               pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-               TEST_ASSERT(!test_pc || pc == test_pc,
-                           "Unexpected pc 0x%lx (expected 0x%lx)",
-                           pc, test_pc);
-
-               if ((pc + 4) == (uint64_t)&iter_ss_end) {
-                       test_pc = 0;
-                       debug.control = KVM_GUESTDBG_ENABLE;
-                       ss_enable = false;
-                       vcpu_guest_debug_set(vcpu, &debug);
-                       continue;
-               }
-
-               /*
-                * If the current pc is between iter_ss_bgin and
-                * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
-                * be the current pc + 4.
-                */
-               if ((pc >= (uint64_t)&iter_ss_begin) &&
-                   (pc < (uint64_t)&iter_ss_end))
-                       test_pc = pc + 4;
-               else
-                       test_pc = 0;
-       }
-
-       kvm_vm_free(vm);
-}
-
-/*
- * Run debug testing using the various breakpoint#, watchpoint# and
- * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
- */
-void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
-{
-       uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
-       int b, w, c;
-
-       /* Number of breakpoints */
-       brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
-       __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
-
-       /* Number of watchpoints */
-       wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
-
-       /* Number of context aware breakpoints */
-       ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
-
-       pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
-                brp_num, wrp_num, ctx_brp_num);
-
-       /* Number of normal (non-context aware) breakpoints */
-       normal_brp_num = brp_num - ctx_brp_num;
-
-       /* Lowest context aware breakpoint number */
-       ctx_brp_base = normal_brp_num;
-
-       /* Run tests with all supported breakpoints/watchpoints */
-       for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
-               for (b = 0; b < normal_brp_num; b++) {
-                       for (w = 0; w < wrp_num; w++)
-                               test_guest_debug_exceptions(b, w, c);
-               }
-       }
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
-       puts("");
-       exit(0);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int opt;
-       int ss_iteration = 10000;
-       uint64_t aa64dfr0;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
-       __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
-                      "Armv8 debug architecture not supported.");
-       kvm_vm_free(vm);
-
-       while ((opt = getopt(argc, argv, "i:")) != -1) {
-               switch (opt) {
-               case 'i':
-                       ss_iteration = atoi_positive("Number of iterations", optarg);
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       break;
-               }
-       }
-
-       test_guest_debug_exceptions_all(aa64dfr0);
-       test_single_step_from_userspace(ss_iteration);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c

deleted file mode 100644 (file)

index d43fb3f..0000000
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ /dev/null
@@ -1,771 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Check for KVM_GET_REG_LIST regressions.
- *
- * Copyright (C) 2020, Red Hat, Inc.
- *
- * While the blessed list should be created from the oldest possible
- * kernel, we can't go older than v5.2, though, because that's the first
- * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
- * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
- * registers won't match expectations.
- */
-#include <stdio.h>
-#include "kvm_util.h"
-#include "test_util.h"
-#include "processor.h"
-
-struct feature_id_reg {
-       __u64 reg;
-       __u64 id_reg;
-       __u64 feat_shift;
-       __u64 feat_min;
-};
-
-static struct feature_id_reg feat_id_regs[] = {
-       {
-               ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               0,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               8,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               8,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               16,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               16,
-               1
-       }
-};
-
-bool filter_reg(__u64 reg)
-{
-       /*
-        * DEMUX register presence depends on the host's CLIDR_EL1.
-        * This means there's no set of them that we can bless.
-        */
-       if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
-               return true;
-
-       return false;
-}
-
-static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
-{
-       int i, ret;
-       __u64 data, feat_val;
-
-       for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
-               if (feat_id_regs[i].reg == reg) {
-                       ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
-                       if (ret < 0)
-                               return false;
-
-                       feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
-                       return feat_val >= feat_id_regs[i].feat_min;
-               }
-       }
-
-       return true;
-}
-
-bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
-{
-       return check_supported_feat_reg(vcpu, reg);
-}
-
-bool check_reject_set(int err)
-{
-       return err == EPERM;
-}
-
-void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
-{
-       struct vcpu_reg_sublist *s;
-       int feature;
-
-       for_each_sublist(c, s) {
-               if (s->finalize) {
-                       feature = s->feature;
-                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
-               }
-       }
-}
-
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
-
-#define CORE_REGS_XX_NR_WORDS  2
-#define CORE_SPSR_XX_NR_WORDS  2
-#define CORE_FPREGS_XX_NR_WORDS        4
-
-static const char *core_id_to_str(const char *prefix, __u64 id)
-{
-       __u64 core_off = id & ~REG_MASK, idx;
-
-       /*
-        * core_off is the offset into struct kvm_regs
-        */
-       switch (core_off) {
-       case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
-            KVM_REG_ARM_CORE_REG(regs.regs[30]):
-               idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
-               return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
-       case KVM_REG_ARM_CORE_REG(regs.sp):
-               return "KVM_REG_ARM_CORE_REG(regs.sp)";
-       case KVM_REG_ARM_CORE_REG(regs.pc):
-               return "KVM_REG_ARM_CORE_REG(regs.pc)";
-       case KVM_REG_ARM_CORE_REG(regs.pstate):
-               return "KVM_REG_ARM_CORE_REG(regs.pstate)";
-       case KVM_REG_ARM_CORE_REG(sp_el1):
-               return "KVM_REG_ARM_CORE_REG(sp_el1)";
-       case KVM_REG_ARM_CORE_REG(elr_el1):
-               return "KVM_REG_ARM_CORE_REG(elr_el1)";
-       case KVM_REG_ARM_CORE_REG(spsr[0]) ...
-            KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
-               idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
-               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
-               return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
-       case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
-            KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
-               idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
-               return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
-       case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
-               return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
-       case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
-               return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
-       }
-
-       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
-       return NULL;
-}
-
-static const char *sve_id_to_str(const char *prefix, __u64 id)
-{
-       __u64 sve_off, n, i;
-
-       if (id == KVM_REG_ARM64_SVE_VLS)
-               return "KVM_REG_ARM64_SVE_VLS";
-
-       sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
-       i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
-
-       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
-
-       switch (sve_off) {
-       case KVM_REG_ARM64_SVE_ZREG_BASE ...
-            KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
-               n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
-               TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
-                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
-               return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
-       case KVM_REG_ARM64_SVE_PREG_BASE ...
-            KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
-               n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
-               TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
-                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
-               return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
-       case KVM_REG_ARM64_SVE_FFR_BASE:
-               TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
-                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
-               return "KVM_REG_ARM64_SVE_FFR(0)";
-       }
-
-       return NULL;
-}
-
-void print_reg(const char *prefix, __u64 id)
-{
-       unsigned op0, op1, crn, crm, op2;
-       const char *reg_size = NULL;
-
-       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
-                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
-
-       switch (id & KVM_REG_SIZE_MASK) {
-       case KVM_REG_SIZE_U8:
-               reg_size = "KVM_REG_SIZE_U8";
-               break;
-       case KVM_REG_SIZE_U16:
-               reg_size = "KVM_REG_SIZE_U16";
-               break;
-       case KVM_REG_SIZE_U32:
-               reg_size = "KVM_REG_SIZE_U32";
-               break;
-       case KVM_REG_SIZE_U64:
-               reg_size = "KVM_REG_SIZE_U64";
-               break;
-       case KVM_REG_SIZE_U128:
-               reg_size = "KVM_REG_SIZE_U128";
-               break;
-       case KVM_REG_SIZE_U256:
-               reg_size = "KVM_REG_SIZE_U256";
-               break;
-       case KVM_REG_SIZE_U512:
-               reg_size = "KVM_REG_SIZE_U512";
-               break;
-       case KVM_REG_SIZE_U1024:
-               reg_size = "KVM_REG_SIZE_U1024";
-               break;
-       case KVM_REG_SIZE_U2048:
-               reg_size = "KVM_REG_SIZE_U2048";
-               break;
-       default:
-               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
-                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
-       }
-
-       switch (id & KVM_REG_ARM_COPROC_MASK) {
-       case KVM_REG_ARM_CORE:
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
-               break;
-       case KVM_REG_ARM_DEMUX:
-               TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
-                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
-                      reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
-               break;
-       case KVM_REG_ARM64_SYSREG:
-               op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
-               op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
-               crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
-               crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
-               op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
-               TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
-                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
-               printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
-               break;
-       case KVM_REG_ARM_FW:
-               TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
-                           "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
-               printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
-               break;
-       case KVM_REG_ARM_FW_FEAT_BMAP:
-               TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
-                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
-               printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
-               break;
-       case KVM_REG_ARM64_SVE:
-               printf("\t%s,\n", sve_id_to_str(prefix, id));
-               break;
-       default:
-               TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
-                         prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
-       }
-}
-
-/*
- * The original blessed list was primed with the output of kernel version
- * v4.15 with --core-reg-fixup and then later updated with new registers.
- * (The --core-reg-fixup option and it's fixup function have been removed
- * from the test, as it's unlikely to use this type of test on a kernel
- * older than v5.2.)
- *
- * The blessed list is up to date with kernel version v6.4 (or so we hope)
- */
-static __u64 base_regs[] = {
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
-       KVM_REG_ARM_FW_REG(0),          /* KVM_REG_ARM_PSCI_VERSION */
-       KVM_REG_ARM_FW_REG(1),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
-       KVM_REG_ARM_FW_REG(2),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
-       KVM_REG_ARM_FW_REG(3),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
-       KVM_REG_ARM_FW_FEAT_BMAP_REG(0),        /* KVM_REG_ARM_STD_BMAP */
-       KVM_REG_ARM_FW_FEAT_BMAP_REG(1),        /* KVM_REG_ARM_STD_HYP_BMAP */
-       KVM_REG_ARM_FW_FEAT_BMAP_REG(2),        /* KVM_REG_ARM_VENDOR_HYP_BMAP */
-       ARM64_SYS_REG(3, 3, 14, 3, 1),  /* CNTV_CTL_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 3, 2),  /* CNTV_CVAL_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 0, 2),
-       ARM64_SYS_REG(3, 0, 0, 0, 0),   /* MIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 0, 6),   /* REVIDR_EL1 */
-       ARM64_SYS_REG(3, 1, 0, 0, 1),   /* CLIDR_EL1 */
-       ARM64_SYS_REG(3, 1, 0, 0, 7),   /* AIDR_EL1 */
-       ARM64_SYS_REG(3, 3, 0, 0, 1),   /* CTR_EL0 */
-       ARM64_SYS_REG(2, 0, 0, 0, 4),
-       ARM64_SYS_REG(2, 0, 0, 0, 5),
-       ARM64_SYS_REG(2, 0, 0, 0, 6),
-       ARM64_SYS_REG(2, 0, 0, 0, 7),
-       ARM64_SYS_REG(2, 0, 0, 1, 4),
-       ARM64_SYS_REG(2, 0, 0, 1, 5),
-       ARM64_SYS_REG(2, 0, 0, 1, 6),
-       ARM64_SYS_REG(2, 0, 0, 1, 7),
-       ARM64_SYS_REG(2, 0, 0, 2, 0),   /* MDCCINT_EL1 */
-       ARM64_SYS_REG(2, 0, 0, 2, 2),   /* MDSCR_EL1 */
-       ARM64_SYS_REG(2, 0, 0, 2, 4),
-       ARM64_SYS_REG(2, 0, 0, 2, 5),
-       ARM64_SYS_REG(2, 0, 0, 2, 6),
-       ARM64_SYS_REG(2, 0, 0, 2, 7),
-       ARM64_SYS_REG(2, 0, 0, 3, 4),
-       ARM64_SYS_REG(2, 0, 0, 3, 5),
-       ARM64_SYS_REG(2, 0, 0, 3, 6),
-       ARM64_SYS_REG(2, 0, 0, 3, 7),
-       ARM64_SYS_REG(2, 0, 0, 4, 4),
-       ARM64_SYS_REG(2, 0, 0, 4, 5),
-       ARM64_SYS_REG(2, 0, 0, 4, 6),
-       ARM64_SYS_REG(2, 0, 0, 4, 7),
-       ARM64_SYS_REG(2, 0, 0, 5, 4),
-       ARM64_SYS_REG(2, 0, 0, 5, 5),
-       ARM64_SYS_REG(2, 0, 0, 5, 6),
-       ARM64_SYS_REG(2, 0, 0, 5, 7),
-       ARM64_SYS_REG(2, 0, 0, 6, 4),
-       ARM64_SYS_REG(2, 0, 0, 6, 5),
-       ARM64_SYS_REG(2, 0, 0, 6, 6),
-       ARM64_SYS_REG(2, 0, 0, 6, 7),
-       ARM64_SYS_REG(2, 0, 0, 7, 4),
-       ARM64_SYS_REG(2, 0, 0, 7, 5),
-       ARM64_SYS_REG(2, 0, 0, 7, 6),
-       ARM64_SYS_REG(2, 0, 0, 7, 7),
-       ARM64_SYS_REG(2, 0, 0, 8, 4),
-       ARM64_SYS_REG(2, 0, 0, 8, 5),
-       ARM64_SYS_REG(2, 0, 0, 8, 6),
-       ARM64_SYS_REG(2, 0, 0, 8, 7),
-       ARM64_SYS_REG(2, 0, 0, 9, 4),
-       ARM64_SYS_REG(2, 0, 0, 9, 5),
-       ARM64_SYS_REG(2, 0, 0, 9, 6),
-       ARM64_SYS_REG(2, 0, 0, 9, 7),
-       ARM64_SYS_REG(2, 0, 0, 10, 4),
-       ARM64_SYS_REG(2, 0, 0, 10, 5),
-       ARM64_SYS_REG(2, 0, 0, 10, 6),
-       ARM64_SYS_REG(2, 0, 0, 10, 7),
-       ARM64_SYS_REG(2, 0, 0, 11, 4),
-       ARM64_SYS_REG(2, 0, 0, 11, 5),
-       ARM64_SYS_REG(2, 0, 0, 11, 6),
-       ARM64_SYS_REG(2, 0, 0, 11, 7),
-       ARM64_SYS_REG(2, 0, 0, 12, 4),
-       ARM64_SYS_REG(2, 0, 0, 12, 5),
-       ARM64_SYS_REG(2, 0, 0, 12, 6),
-       ARM64_SYS_REG(2, 0, 0, 12, 7),
-       ARM64_SYS_REG(2, 0, 0, 13, 4),
-       ARM64_SYS_REG(2, 0, 0, 13, 5),
-       ARM64_SYS_REG(2, 0, 0, 13, 6),
-       ARM64_SYS_REG(2, 0, 0, 13, 7),
-       ARM64_SYS_REG(2, 0, 0, 14, 4),
-       ARM64_SYS_REG(2, 0, 0, 14, 5),
-       ARM64_SYS_REG(2, 0, 0, 14, 6),
-       ARM64_SYS_REG(2, 0, 0, 14, 7),
-       ARM64_SYS_REG(2, 0, 0, 15, 4),
-       ARM64_SYS_REG(2, 0, 0, 15, 5),
-       ARM64_SYS_REG(2, 0, 0, 15, 6),
-       ARM64_SYS_REG(2, 0, 0, 15, 7),
-       ARM64_SYS_REG(2, 0, 1, 1, 4),   /* OSLSR_EL1 */
-       ARM64_SYS_REG(2, 4, 0, 7, 0),   /* DBGVCR32_EL2 */
-       ARM64_SYS_REG(3, 0, 0, 0, 5),   /* MPIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 0),   /* ID_PFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 1),   /* ID_PFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 2),   /* ID_DFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 3),   /* ID_AFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 4),   /* ID_MMFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 5),   /* ID_MMFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 6),   /* ID_MMFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 7),   /* ID_MMFR3_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 0),   /* ID_ISAR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 1),   /* ID_ISAR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 2),   /* ID_ISAR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 3),   /* ID_ISAR3_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 4),   /* ID_ISAR4_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 5),   /* ID_ISAR5_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 6),   /* ID_MMFR4_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 7),   /* ID_ISAR6_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 0),   /* MVFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 1),   /* MVFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 2),   /* MVFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 3),
-       ARM64_SYS_REG(3, 0, 0, 3, 4),   /* ID_PFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 5),   /* ID_DFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 6),   /* ID_MMFR5_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 7),
-       ARM64_SYS_REG(3, 0, 0, 4, 0),   /* ID_AA64PFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 1),   /* ID_AA64PFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 2),   /* ID_AA64PFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 3),
-       ARM64_SYS_REG(3, 0, 0, 4, 4),   /* ID_AA64ZFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 5),   /* ID_AA64SMFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 6),
-       ARM64_SYS_REG(3, 0, 0, 4, 7),
-       ARM64_SYS_REG(3, 0, 0, 5, 0),   /* ID_AA64DFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 1),   /* ID_AA64DFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 2),
-       ARM64_SYS_REG(3, 0, 0, 5, 3),
-       ARM64_SYS_REG(3, 0, 0, 5, 4),   /* ID_AA64AFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 5),   /* ID_AA64AFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 6),
-       ARM64_SYS_REG(3, 0, 0, 5, 7),
-       ARM64_SYS_REG(3, 0, 0, 6, 0),   /* ID_AA64ISAR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 6, 1),   /* ID_AA64ISAR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 6, 2),   /* ID_AA64ISAR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 6, 3),
-       ARM64_SYS_REG(3, 0, 0, 6, 4),
-       ARM64_SYS_REG(3, 0, 0, 6, 5),
-       ARM64_SYS_REG(3, 0, 0, 6, 6),
-       ARM64_SYS_REG(3, 0, 0, 6, 7),
-       ARM64_SYS_REG(3, 0, 0, 7, 0),   /* ID_AA64MMFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 1),   /* ID_AA64MMFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 2),   /* ID_AA64MMFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 4),   /* ID_AA64MMFR4_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 5),
-       ARM64_SYS_REG(3, 0, 0, 7, 6),
-       ARM64_SYS_REG(3, 0, 0, 7, 7),
-       ARM64_SYS_REG(3, 0, 1, 0, 0),   /* SCTLR_EL1 */
-       ARM64_SYS_REG(3, 0, 1, 0, 1),   /* ACTLR_EL1 */
-       ARM64_SYS_REG(3, 0, 1, 0, 2),   /* CPACR_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 0),   /* TTBR0_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 1),   /* TTBR1_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 2),   /* TCR_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
-       ARM64_SYS_REG(3, 0, 5, 1, 0),   /* AFSR0_EL1 */
-       ARM64_SYS_REG(3, 0, 5, 1, 1),   /* AFSR1_EL1 */
-       ARM64_SYS_REG(3, 0, 5, 2, 0),   /* ESR_EL1 */
-       ARM64_SYS_REG(3, 0, 6, 0, 0),   /* FAR_EL1 */
-       ARM64_SYS_REG(3, 0, 7, 4, 0),   /* PAR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 0),  /* MAIR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 3, 0),  /* AMAIR_EL1 */
-       ARM64_SYS_REG(3, 0, 12, 0, 0),  /* VBAR_EL1 */
-       ARM64_SYS_REG(3, 0, 12, 1, 1),  /* DISR_EL1 */
-       ARM64_SYS_REG(3, 0, 13, 0, 1),  /* CONTEXTIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 13, 0, 4),  /* TPIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 14, 1, 0),  /* CNTKCTL_EL1 */
-       ARM64_SYS_REG(3, 2, 0, 0, 0),   /* CSSELR_EL1 */
-       ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
-       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
-       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 0, 1),  /* CNTPCT_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 2, 1),  /* CNTP_CTL_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 2, 2),  /* CNTP_CVAL_EL0 */
-       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
-       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
-       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
-};
-
-static __u64 pmu_regs[] = {
-       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
-       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
-       ARM64_SYS_REG(3, 3, 9, 12, 0),  /* PMCR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 1),  /* PMCNTENSET_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 2),  /* PMCNTENCLR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 3),  /* PMOVSCLR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 4),  /* PMSWINC_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 5),  /* PMSELR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 13, 0),  /* PMCCNTR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 14, 0),  /* PMUSERENR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 14, 3),  /* PMOVSSET_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 8, 0),
-       ARM64_SYS_REG(3, 3, 14, 8, 1),
-       ARM64_SYS_REG(3, 3, 14, 8, 2),
-       ARM64_SYS_REG(3, 3, 14, 8, 3),
-       ARM64_SYS_REG(3, 3, 14, 8, 4),
-       ARM64_SYS_REG(3, 3, 14, 8, 5),
-       ARM64_SYS_REG(3, 3, 14, 8, 6),
-       ARM64_SYS_REG(3, 3, 14, 8, 7),
-       ARM64_SYS_REG(3, 3, 14, 9, 0),
-       ARM64_SYS_REG(3, 3, 14, 9, 1),
-       ARM64_SYS_REG(3, 3, 14, 9, 2),
-       ARM64_SYS_REG(3, 3, 14, 9, 3),
-       ARM64_SYS_REG(3, 3, 14, 9, 4),
-       ARM64_SYS_REG(3, 3, 14, 9, 5),
-       ARM64_SYS_REG(3, 3, 14, 9, 6),
-       ARM64_SYS_REG(3, 3, 14, 9, 7),
-       ARM64_SYS_REG(3, 3, 14, 10, 0),
-       ARM64_SYS_REG(3, 3, 14, 10, 1),
-       ARM64_SYS_REG(3, 3, 14, 10, 2),
-       ARM64_SYS_REG(3, 3, 14, 10, 3),
-       ARM64_SYS_REG(3, 3, 14, 10, 4),
-       ARM64_SYS_REG(3, 3, 14, 10, 5),
-       ARM64_SYS_REG(3, 3, 14, 10, 6),
-       ARM64_SYS_REG(3, 3, 14, 10, 7),
-       ARM64_SYS_REG(3, 3, 14, 11, 0),
-       ARM64_SYS_REG(3, 3, 14, 11, 1),
-       ARM64_SYS_REG(3, 3, 14, 11, 2),
-       ARM64_SYS_REG(3, 3, 14, 11, 3),
-       ARM64_SYS_REG(3, 3, 14, 11, 4),
-       ARM64_SYS_REG(3, 3, 14, 11, 5),
-       ARM64_SYS_REG(3, 3, 14, 11, 6),
-       ARM64_SYS_REG(3, 3, 14, 12, 0),
-       ARM64_SYS_REG(3, 3, 14, 12, 1),
-       ARM64_SYS_REG(3, 3, 14, 12, 2),
-       ARM64_SYS_REG(3, 3, 14, 12, 3),
-       ARM64_SYS_REG(3, 3, 14, 12, 4),
-       ARM64_SYS_REG(3, 3, 14, 12, 5),
-       ARM64_SYS_REG(3, 3, 14, 12, 6),
-       ARM64_SYS_REG(3, 3, 14, 12, 7),
-       ARM64_SYS_REG(3, 3, 14, 13, 0),
-       ARM64_SYS_REG(3, 3, 14, 13, 1),
-       ARM64_SYS_REG(3, 3, 14, 13, 2),
-       ARM64_SYS_REG(3, 3, 14, 13, 3),
-       ARM64_SYS_REG(3, 3, 14, 13, 4),
-       ARM64_SYS_REG(3, 3, 14, 13, 5),
-       ARM64_SYS_REG(3, 3, 14, 13, 6),
-       ARM64_SYS_REG(3, 3, 14, 13, 7),
-       ARM64_SYS_REG(3, 3, 14, 14, 0),
-       ARM64_SYS_REG(3, 3, 14, 14, 1),
-       ARM64_SYS_REG(3, 3, 14, 14, 2),
-       ARM64_SYS_REG(3, 3, 14, 14, 3),
-       ARM64_SYS_REG(3, 3, 14, 14, 4),
-       ARM64_SYS_REG(3, 3, 14, 14, 5),
-       ARM64_SYS_REG(3, 3, 14, 14, 6),
-       ARM64_SYS_REG(3, 3, 14, 14, 7),
-       ARM64_SYS_REG(3, 3, 14, 15, 0),
-       ARM64_SYS_REG(3, 3, 14, 15, 1),
-       ARM64_SYS_REG(3, 3, 14, 15, 2),
-       ARM64_SYS_REG(3, 3, 14, 15, 3),
-       ARM64_SYS_REG(3, 3, 14, 15, 4),
-       ARM64_SYS_REG(3, 3, 14, 15, 5),
-       ARM64_SYS_REG(3, 3, 14, 15, 6),
-       ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
-};
-
-static __u64 vregs[] = {
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
-};
-
-static __u64 sve_regs[] = {
-       KVM_REG_ARM64_SVE_VLS,
-       KVM_REG_ARM64_SVE_ZREG(0, 0),
-       KVM_REG_ARM64_SVE_ZREG(1, 0),
-       KVM_REG_ARM64_SVE_ZREG(2, 0),
-       KVM_REG_ARM64_SVE_ZREG(3, 0),
-       KVM_REG_ARM64_SVE_ZREG(4, 0),
-       KVM_REG_ARM64_SVE_ZREG(5, 0),
-       KVM_REG_ARM64_SVE_ZREG(6, 0),
-       KVM_REG_ARM64_SVE_ZREG(7, 0),
-       KVM_REG_ARM64_SVE_ZREG(8, 0),
-       KVM_REG_ARM64_SVE_ZREG(9, 0),
-       KVM_REG_ARM64_SVE_ZREG(10, 0),
-       KVM_REG_ARM64_SVE_ZREG(11, 0),
-       KVM_REG_ARM64_SVE_ZREG(12, 0),
-       KVM_REG_ARM64_SVE_ZREG(13, 0),
-       KVM_REG_ARM64_SVE_ZREG(14, 0),
-       KVM_REG_ARM64_SVE_ZREG(15, 0),
-       KVM_REG_ARM64_SVE_ZREG(16, 0),
-       KVM_REG_ARM64_SVE_ZREG(17, 0),
-       KVM_REG_ARM64_SVE_ZREG(18, 0),
-       KVM_REG_ARM64_SVE_ZREG(19, 0),
-       KVM_REG_ARM64_SVE_ZREG(20, 0),
-       KVM_REG_ARM64_SVE_ZREG(21, 0),
-       KVM_REG_ARM64_SVE_ZREG(22, 0),
-       KVM_REG_ARM64_SVE_ZREG(23, 0),
-       KVM_REG_ARM64_SVE_ZREG(24, 0),
-       KVM_REG_ARM64_SVE_ZREG(25, 0),
-       KVM_REG_ARM64_SVE_ZREG(26, 0),
-       KVM_REG_ARM64_SVE_ZREG(27, 0),
-       KVM_REG_ARM64_SVE_ZREG(28, 0),
-       KVM_REG_ARM64_SVE_ZREG(29, 0),
-       KVM_REG_ARM64_SVE_ZREG(30, 0),
-       KVM_REG_ARM64_SVE_ZREG(31, 0),
-       KVM_REG_ARM64_SVE_PREG(0, 0),
-       KVM_REG_ARM64_SVE_PREG(1, 0),
-       KVM_REG_ARM64_SVE_PREG(2, 0),
-       KVM_REG_ARM64_SVE_PREG(3, 0),
-       KVM_REG_ARM64_SVE_PREG(4, 0),
-       KVM_REG_ARM64_SVE_PREG(5, 0),
-       KVM_REG_ARM64_SVE_PREG(6, 0),
-       KVM_REG_ARM64_SVE_PREG(7, 0),
-       KVM_REG_ARM64_SVE_PREG(8, 0),
-       KVM_REG_ARM64_SVE_PREG(9, 0),
-       KVM_REG_ARM64_SVE_PREG(10, 0),
-       KVM_REG_ARM64_SVE_PREG(11, 0),
-       KVM_REG_ARM64_SVE_PREG(12, 0),
-       KVM_REG_ARM64_SVE_PREG(13, 0),
-       KVM_REG_ARM64_SVE_PREG(14, 0),
-       KVM_REG_ARM64_SVE_PREG(15, 0),
-       KVM_REG_ARM64_SVE_FFR(0),
-       ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
-};
-
-static __u64 sve_rejects_set[] = {
-       KVM_REG_ARM64_SVE_VLS,
-};
-
-static __u64 pauth_addr_regs[] = {
-       ARM64_SYS_REG(3, 0, 2, 1, 0),   /* APIAKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 1, 1),   /* APIAKEYHI_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 1, 2),   /* APIBKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 1, 3),   /* APIBKEYHI_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 0),   /* APDAKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 1),   /* APDAKEYHI_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 2),   /* APDBKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 3)    /* APDBKEYHI_EL1 */
-};
-
-static __u64 pauth_generic_regs[] = {
-       ARM64_SYS_REG(3, 0, 2, 3, 0),   /* APGAKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 3, 1),   /* APGAKEYHI_EL1 */
-};
-
-#define BASE_SUBLIST \
-       { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
-#define VREGS_SUBLIST \
-       { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
-#define PMU_SUBLIST \
-       { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
-         .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
-#define SVE_SUBLIST \
-       { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
-         .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
-         .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
-#define PAUTH_SUBLIST                                                  \
-       {                                                               \
-               .name           = "pauth_address",                      \
-               .capability     = KVM_CAP_ARM_PTRAUTH_ADDRESS,          \
-               .feature        = KVM_ARM_VCPU_PTRAUTH_ADDRESS,         \
-               .regs           = pauth_addr_regs,                      \
-               .regs_n         = ARRAY_SIZE(pauth_addr_regs),          \
-       },                                                              \
-       {                                                               \
-               .name           = "pauth_generic",                      \
-               .capability     = KVM_CAP_ARM_PTRAUTH_GENERIC,          \
-               .feature        = KVM_ARM_VCPU_PTRAUTH_GENERIC,         \
-               .regs           = pauth_generic_regs,                   \
-               .regs_n         = ARRAY_SIZE(pauth_generic_regs),       \
-       }
-
-static struct vcpu_reg_list vregs_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list vregs_pmu_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       PMU_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list sve_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       SVE_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list sve_pmu_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       SVE_SUBLIST,
-       PMU_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list pauth_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       PAUTH_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list pauth_pmu_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       PAUTH_SUBLIST,
-       PMU_SUBLIST,
-       {0},
-       },
-};
-
-struct vcpu_reg_list *vcpu_configs[] = {
-       &vregs_config,
-       &vregs_pmu_config,
-       &sve_config,
-       &sve_pmu_config,
-       &pauth_config,
-       &pauth_pmu_config,
-};
-int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c

deleted file mode 100644 (file)

index ec54ec7..0000000
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ /dev/null
@@ -1,308 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
- *
- * The test validates the basic hypercall functionalities that are exposed
- * via the psuedo-firmware bitmap register. This includes the registers'
- * read/write behavior before and after the VM has started, and if the
- * hypercalls are properly masked or unmasked to the guest when disabled or
- * enabled from the KVM userspace, respectively.
- */
-#include <errno.h>
-#include <linux/arm-smccc.h>
-#include <asm/kvm.h>
-#include <kvm_util.h>
-
-#include "processor.h"
-
-#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
-
-/* Last valid bits of the bitmapped firmware registers */
-#define KVM_REG_ARM_STD_BMAP_BIT_MAX           0
-#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX       0
-#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX    1
-
-struct kvm_fw_reg_info {
-       uint64_t reg;           /* Register definition */
-       uint64_t max_feat_bit;  /* Bit that represents the upper limit of the feature-map */
-};
-
-#define FW_REG_INFO(r)                 \
-       {                                       \
-               .reg = r,                       \
-               .max_feat_bit = r##_BIT_MAX,    \
-       }
-
-static const struct kvm_fw_reg_info fw_reg_info[] = {
-       FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
-       FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
-       FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
-};
-
-enum test_stage {
-       TEST_STAGE_REG_IFACE,
-       TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
-       TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
-       TEST_STAGE_HVC_IFACE_FALSE_INFO,
-       TEST_STAGE_END,
-};
-
-static int stage = TEST_STAGE_REG_IFACE;
-
-struct test_hvc_info {
-       uint32_t func_id;
-       uint64_t arg1;
-};
-
-#define TEST_HVC_INFO(f, a1)   \
-       {                       \
-               .func_id = f,   \
-               .arg1 = a1,     \
-       }
-
-static const struct test_hvc_info hvc_info[] = {
-       /* KVM_REG_ARM_STD_BMAP */
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
-
-       /* KVM_REG_ARM_STD_HYP_BMAP */
-       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
-       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
-       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
-
-       /* KVM_REG_ARM_VENDOR_HYP_BMAP */
-       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
-                       ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
-       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
-       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
-};
-
-/* Feed false hypercall info to test the KVM behavior */
-static const struct test_hvc_info false_hvc_info[] = {
-       /* Feature support check against a different family of hypercalls */
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
-       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
-       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
-};
-
-static void guest_test_hvc(const struct test_hvc_info *hc_info)
-{
-       unsigned int i;
-       struct arm_smccc_res res;
-       unsigned int hvc_info_arr_sz;
-
-       hvc_info_arr_sz =
-       hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
-
-       for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
-               memset(&res, 0, sizeof(res));
-               smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
-
-               switch (stage) {
-               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
-               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
-                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
-                       break;
-               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
-                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
-                       break;
-               default:
-                       GUEST_FAIL("Unexpected stage = %u", stage);
-               }
-       }
-}
-
-static void guest_code(void)
-{
-       while (stage != TEST_STAGE_END) {
-               switch (stage) {
-               case TEST_STAGE_REG_IFACE:
-                       break;
-               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
-               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       guest_test_hvc(hvc_info);
-                       break;
-               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       guest_test_hvc(false_hvc_info);
-                       break;
-               default:
-                       GUEST_FAIL("Unexpected stage = %u", stage);
-               }
-
-               GUEST_SYNC(stage);
-       }
-
-       GUEST_DONE();
-}
-
-struct st_time {
-       uint32_t rev;
-       uint32_t attr;
-       uint64_t st_time;
-};
-
-#define STEAL_TIME_SIZE                ((sizeof(struct st_time) + 63) & ~63)
-#define ST_GPA_BASE            (1 << 30)
-
-static void steal_time_init(struct kvm_vcpu *vcpu)
-{
-       uint64_t st_ipa = (ulong)ST_GPA_BASE;
-       unsigned int gpages;
-
-       gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
-       vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
-
-       vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
-                            KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
-}
-
-static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
-{
-       uint64_t val;
-       unsigned int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
-               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
-
-               /* First 'read' should be an upper limit of the features supported */
-               val = vcpu_get_reg(vcpu, reg_info->reg);
-               TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
-                       "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
-                       reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
-
-               /* Test a 'write' by disabling all the features of the register map */
-               ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
-               TEST_ASSERT(ret == 0,
-                       "Failed to clear all the features of reg: 0x%lx; ret: %d",
-                       reg_info->reg, errno);
-
-               val = vcpu_get_reg(vcpu, reg_info->reg);
-               TEST_ASSERT(val == 0,
-                       "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
-
-               /*
-                * Test enabling a feature that's not supported.
-                * Avoid this check if all the bits are occupied.
-                */
-               if (reg_info->max_feat_bit < 63) {
-                       ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
-                       TEST_ASSERT(ret != 0 && errno == EINVAL,
-                       "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
-                       errno, reg_info->reg);
-               }
-       }
-}
-
-static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
-{
-       uint64_t val;
-       unsigned int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
-               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
-
-               /*
-                * Before starting the VM, the test clears all the bits.
-                * Check if that's still the case.
-                */
-               val = vcpu_get_reg(vcpu, reg_info->reg);
-               TEST_ASSERT(val == 0,
-                       "Expected all the features to be cleared for reg: 0x%lx",
-                       reg_info->reg);
-
-               /*
-                * Since the VM has run at least once, KVM shouldn't allow modification of
-                * the registers and should return EBUSY. Set the registers and check for
-                * the expected errno.
-                */
-               ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
-               TEST_ASSERT(ret != 0 && errno == EBUSY,
-               "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
-               errno, reg_info->reg);
-       }
-}
-
-static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
-       steal_time_init(*vcpu);
-
-       return vm;
-}
-
-static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
-{
-       int prev_stage = stage;
-
-       pr_debug("Stage: %d\n", prev_stage);
-
-       /* Sync the stage early, the VM might be freed below. */
-       stage++;
-       sync_global_to_guest(*vm, stage);
-
-       switch (prev_stage) {
-       case TEST_STAGE_REG_IFACE:
-               test_fw_regs_after_vm_start(*vcpu);
-               break;
-       case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
-               /* Start a new VM so that all the features are now enabled by default */
-               kvm_vm_free(*vm);
-               *vm = test_vm_create(vcpu);
-               break;
-       case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-       case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-               break;
-       default:
-               TEST_FAIL("Unknown test stage: %d", prev_stage);
-       }
-}
-
-static void test_run(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       bool guest_done = false;
-
-       vm = test_vm_create(&vcpu);
-
-       test_fw_regs_before_vm_start(vcpu);
-
-       while (!guest_done) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       test_guest_stage(&vm, &vcpu);
-                       break;
-               case UCALL_DONE:
-                       guest_done = true;
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               default:
-                       TEST_FAIL("Unexpected guest exit");
-               }
-       }
-
-       kvm_vm_free(vm);
-}
-
-int main(void)
-{
-       test_run();
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/aarch64/mmio_abort.c

deleted file mode 100644 (file)

index 8b7a80a..0000000
--- a/tools/testing/selftests/kvm/aarch64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR      0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
-       u64 esr = read_sysreg(esr_el1);
-
-       GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
-       GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
-       GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
-       GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
-       GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
-                                                 handler_fn dabt_handler)
-{
-       struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(*vcpu);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
-       virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
-       return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu_events events = {};
-
-       events.exception.ext_dabt_pending = true;
-       vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
-       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
-       asm volatile("test_mmio_abort_insn:\n\t"
-                    "ldr x0, [%0]\n\t"
-                    : : "r" (MMIO_ADDR) : "x0", "memory");
-
-       GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
-                                                       expect_sea_handler);
-       struct kvm_run *run = vcpu->run;
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
-       TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
-       TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
-       TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
-       vcpu_inject_extabt(vcpu);
-       vcpu_run_expect_done(vcpu);
-       kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
-       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
-       asm volatile("test_mmio_nisv_insn:\n\t"
-                    "ldr x0, [%0], #8\n\t"
-                    : : "r" (MMIO_ADDR) : "x0", "memory");
-
-       GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
-                                                       unexpected_dabt_handler);
-
-       TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
-       TEST_ASSERT_EQ(errno, ENOSYS);
-
-       kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
-                                                       expect_sea_handler);
-       struct kvm_run *run = vcpu->run;
-
-       vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
-       TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
-       vcpu_inject_extabt(vcpu);
-       vcpu_run_expect_done(vcpu);
-       kvm_vm_free(vm);
-}
-
-int main(void)
-{
-       test_mmio_abort();
-       test_mmio_nisv();
-       test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c

deleted file mode 100644 (file)

index ebd7043..0000000
--- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Check that, on a GICv3 system, not configuring GICv3 correctly
-// results in all of the sysregs generating an UNDEF exception.
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-static volatile bool handled;
-
-#define __check_sr_read(r)                                     \
-       ({                                                      \
-               uint64_t val;                                   \
-                                                               \
-               handled = false;                                \
-               dsb(sy);                                        \
-               val = read_sysreg_s(SYS_ ## r);                 \
-               val;                                            \
-       })
-
-#define __check_sr_write(r)                                    \
-       do {                                                    \
-               handled = false;                                \
-               dsb(sy);                                        \
-               write_sysreg_s(0, SYS_ ## r);                   \
-               isb();                                          \
-       } while(0)
-
-/* Fatal checks */
-#define check_sr_read(r)                                       \
-       do {                                                    \
-               __check_sr_read(r);                             \
-               __GUEST_ASSERT(handled, #r " no read trap");    \
-       } while(0)
-
-#define check_sr_write(r)                                      \
-       do {                                                    \
-               __check_sr_write(r);                            \
-               __GUEST_ASSERT(handled, #r " no write trap");   \
-       } while(0)
-
-#define check_sr_rw(r)                         \
-       do {                                    \
-               check_sr_read(r);               \
-               check_sr_write(r);              \
-       } while(0)
-
-static void guest_code(void)
-{
-       uint64_t val;
-
-       /*
-        * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
-        * hidden the feature at runtime without any other userspace action.
-        */
-       __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
-                                read_sysreg(id_aa64pfr0_el1)) == 0,
-                      "GICv3 wrongly advertised");
-
-       /*
-        * Access all GICv3 registers, and fail if we don't get an UNDEF.
-        * Note that we happily access all the APxRn registers without
-        * checking their existance, as all we want to see is a failure.
-        */
-       check_sr_rw(ICC_PMR_EL1);
-       check_sr_read(ICC_IAR0_EL1);
-       check_sr_write(ICC_EOIR0_EL1);
-       check_sr_rw(ICC_HPPIR0_EL1);
-       check_sr_rw(ICC_BPR0_EL1);
-       check_sr_rw(ICC_AP0R0_EL1);
-       check_sr_rw(ICC_AP0R1_EL1);
-       check_sr_rw(ICC_AP0R2_EL1);
-       check_sr_rw(ICC_AP0R3_EL1);
-       check_sr_rw(ICC_AP1R0_EL1);
-       check_sr_rw(ICC_AP1R1_EL1);
-       check_sr_rw(ICC_AP1R2_EL1);
-       check_sr_rw(ICC_AP1R3_EL1);
-       check_sr_write(ICC_DIR_EL1);
-       check_sr_read(ICC_RPR_EL1);
-       check_sr_write(ICC_SGI1R_EL1);
-       check_sr_write(ICC_ASGI1R_EL1);
-       check_sr_write(ICC_SGI0R_EL1);
-       check_sr_read(ICC_IAR1_EL1);
-       check_sr_write(ICC_EOIR1_EL1);
-       check_sr_rw(ICC_HPPIR1_EL1);
-       check_sr_rw(ICC_BPR1_EL1);
-       check_sr_rw(ICC_CTLR_EL1);
-       check_sr_rw(ICC_IGRPEN0_EL1);
-       check_sr_rw(ICC_IGRPEN1_EL1);
-
-       /*
-        * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
-        * be RAO/WI. Engage in non-fatal accesses, starting with a
-        * write of 0 to try and disable SRE, and let's see if it
-        * sticks.
-        */
-       __check_sr_write(ICC_SRE_EL1);
-       if (!handled)
-               GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
-
-       val = __check_sr_read(ICC_SRE_EL1);
-       if (!handled) {
-               __GUEST_ASSERT((val & BIT(0)),
-                              "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
-               GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
-       }
-
-       GUEST_DONE();
-}
-
-static void guest_undef_handler(struct ex_regs *regs)
-{
-       /* Success, we've gracefully exploded! */
-       handled = true;
-       regs->pc += 4;
-}
-
-static void test_run_vcpu(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       do {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_PRINTF:
-                       printf("%s", uc.buffer);
-                       break;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       } while (uc.cmd != UCALL_DONE);
-}
-
-static void test_guest_no_gicv3(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /* Create a VM without a GICv3 */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_UNKNOWN, guest_undef_handler);
-
-       test_run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t pfr0;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-       pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-       __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
-                      "GICv3 not supported.");
-       kvm_vm_free(vm);
-
-       test_guest_no_gicv3();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c

deleted file mode 100644 (file)

index ec33a8f..0000000
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ /dev/null
@@ -1,1135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * page_fault_test.c - Test stage 2 faults.
- *
- * This test tries different combinations of guest accesses (e.g., write,
- * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
- * hugetlbfs with a hole). It checks that the expected handling method is
- * called (e.g., uffd faults with the right address and write/read flag).
- */
-#include <linux/bitmap.h>
-#include <fcntl.h>
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-#include <asm/sysreg.h>
-#include <linux/bitfield.h>
-#include "guest_modes.h"
-#include "userfaultfd_util.h"
-
-/* Guest virtual addresses that point to the test page and its PTE. */
-#define TEST_GVA                               0xc0000000
-#define TEST_EXEC_GVA                          (TEST_GVA + 0x8)
-#define TEST_PTE_GVA                           0xb0000000
-#define TEST_DATA                              0x0123456789ABCDEF
-
-static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
-
-#define CMD_NONE                               (0)
-#define CMD_SKIP_TEST                          (1ULL << 1)
-#define CMD_HOLE_PT                            (1ULL << 2)
-#define CMD_HOLE_DATA                          (1ULL << 3)
-#define CMD_CHECK_WRITE_IN_DIRTY_LOG           (1ULL << 4)
-#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG                (1ULL << 5)
-#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG                (1ULL << 6)
-#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG     (1ULL << 7)
-#define CMD_SET_PTE_AF                         (1ULL << 8)
-
-#define PREPARE_FN_NR                          10
-#define CHECK_FN_NR                            10
-
-static struct event_cnt {
-       int mmio_exits;
-       int fail_vcpu_runs;
-       int uffd_faults;
-       /* uffd_faults is incremented from multiple threads. */
-       pthread_mutex_t uffd_faults_mutex;
-} events;
-
-struct test_desc {
-       const char *name;
-       uint64_t mem_mark_cmd;
-       /* Skip the test if any prepare function returns false */
-       bool (*guest_prepare[PREPARE_FN_NR])(void);
-       void (*guest_test)(void);
-       void (*guest_test_check[CHECK_FN_NR])(void);
-       uffd_handler_t uffd_pt_handler;
-       uffd_handler_t uffd_data_handler;
-       void (*dabt_handler)(struct ex_regs *regs);
-       void (*iabt_handler)(struct ex_regs *regs);
-       void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
-       void (*fail_vcpu_run_handler)(int ret);
-       uint32_t pt_memslot_flags;
-       uint32_t data_memslot_flags;
-       bool skip;
-       struct event_cnt expected_events;
-};
-
-struct test_params {
-       enum vm_mem_backing_src_type src_type;
-       struct test_desc *test_desc;
-};
-
-static inline void flush_tlb_page(uint64_t vaddr)
-{
-       uint64_t page = vaddr >> 12;
-
-       dsb(ishst);
-       asm volatile("tlbi vaae1is, %0" :: "r" (page));
-       dsb(ish);
-       isb();
-}
-
-static void guest_write64(void)
-{
-       uint64_t val;
-
-       WRITE_ONCE(*guest_test_memory, TEST_DATA);
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, TEST_DATA);
-}
-
-/* Check the system for atomic instructions. */
-static bool guest_check_lse(void)
-{
-       uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
-       uint64_t atomic;
-
-       atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
-       return atomic >= 2;
-}
-
-static bool guest_check_dc_zva(void)
-{
-       uint64_t dczid = read_sysreg(dczid_el0);
-       uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
-
-       return dzp == 0;
-}
-
-/* Compare and swap instruction. */
-static void guest_cas(void)
-{
-       uint64_t val;
-
-       GUEST_ASSERT(guest_check_lse());
-       asm volatile(".arch_extension lse\n"
-                    "casal %0, %1, [%2]\n"
-                    :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, TEST_DATA);
-}
-
-static void guest_read64(void)
-{
-       uint64_t val;
-
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, 0);
-}
-
-/* Address translation instruction */
-static void guest_at(void)
-{
-       uint64_t par;
-
-       asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
-       isb();
-       par = read_sysreg(par_el1);
-
-       /* Bit 1 indicates whether the AT was successful */
-       GUEST_ASSERT_EQ(par & 1, 0);
-}
-
-/*
- * The size of the block written by "dc zva" is guaranteed to be between (2 <<
- * 0) and (2 << 9), which is safe in our case as we need the write to happen
- * for at least a word, and not more than a page.
- */
-static void guest_dc_zva(void)
-{
-       uint16_t val;
-
-       asm volatile("dc zva, %0" :: "r" (guest_test_memory));
-       dsb(ish);
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, 0);
-}
-
-/*
- * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
- * And that's special because KVM must take special care with those: they
- * should still count as accesses for dirty logging or user-faulting, but
- * should be handled differently on mmio.
- */
-static void guest_ld_preidx(void)
-{
-       uint64_t val;
-       uint64_t addr = TEST_GVA - 8;
-
-       /*
-        * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
-        * in a gap between memslots not backing by anything.
-        */
-       asm volatile("ldr %0, [%1, #8]!"
-                    : "=r" (val), "+r" (addr));
-       GUEST_ASSERT_EQ(val, 0);
-       GUEST_ASSERT_EQ(addr, TEST_GVA);
-}
-
-static void guest_st_preidx(void)
-{
-       uint64_t val = TEST_DATA;
-       uint64_t addr = TEST_GVA - 8;
-
-       asm volatile("str %0, [%1, #8]!"
-                    : "+r" (val), "+r" (addr));
-
-       GUEST_ASSERT_EQ(addr, TEST_GVA);
-       val = READ_ONCE(*guest_test_memory);
-}
-
-static bool guest_set_ha(void)
-{
-       uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
-       uint64_t hadbs, tcr;
-
-       /* Skip if HA is not supported. */
-       hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
-       if (hadbs == 0)
-               return false;
-
-       tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
-       write_sysreg(tcr, tcr_el1);
-       isb();
-
-       return true;
-}
-
-static bool guest_clear_pte_af(void)
-{
-       *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
-       flush_tlb_page(TEST_GVA);
-
-       return true;
-}
-
-static void guest_check_pte_af(void)
-{
-       dsb(ish);
-       GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
-}
-
-static void guest_check_write_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
-}
-
-static void guest_check_no_write_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
-}
-
-static void guest_check_s1ptw_wr_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
-}
-
-static void guest_check_no_s1ptw_wr_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
-}
-
-static void guest_exec(void)
-{
-       int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
-       int ret;
-
-       ret = code();
-       GUEST_ASSERT_EQ(ret, 0x77);
-}
-
-static bool guest_prepare(struct test_desc *test)
-{
-       bool (*prepare_fn)(void);
-       int i;
-
-       for (i = 0; i < PREPARE_FN_NR; i++) {
-               prepare_fn = test->guest_prepare[i];
-               if (prepare_fn && !prepare_fn())
-                       return false;
-       }
-
-       return true;
-}
-
-static void guest_test_check(struct test_desc *test)
-{
-       void (*check_fn)(void);
-       int i;
-
-       for (i = 0; i < CHECK_FN_NR; i++) {
-               check_fn = test->guest_test_check[i];
-               if (check_fn)
-                       check_fn();
-       }
-}
-
-static void guest_code(struct test_desc *test)
-{
-       if (!guest_prepare(test))
-               GUEST_SYNC(CMD_SKIP_TEST);
-
-       GUEST_SYNC(test->mem_mark_cmd);
-
-       if (test->guest_test)
-               test->guest_test();
-
-       guest_test_check(test);
-       GUEST_DONE();
-}
-
-static void no_dabt_handler(struct ex_regs *regs)
-{
-       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
-}
-
-static void no_iabt_handler(struct ex_regs *regs)
-{
-       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
-}
-
-static struct uffd_args {
-       char *copy;
-       void *hva;
-       uint64_t paging_size;
-} pt_args, data_args;
-
-/* Returns true to continue the test, and false if it should be skipped. */
-static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
-                               struct uffd_args *args)
-{
-       uint64_t addr = msg->arg.pagefault.address;
-       uint64_t flags = msg->arg.pagefault.flags;
-       struct uffdio_copy copy;
-       int ret;
-
-       TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
-                   "The only expected UFFD mode is MISSING");
-       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
-
-       pr_debug("uffd fault: addr=%p write=%d\n",
-                (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
-
-       copy.src = (uint64_t)args->copy;
-       copy.dst = addr;
-       copy.len = args->paging_size;
-       copy.mode = 0;
-
-       ret = ioctl(uffd, UFFDIO_COPY, &copy);
-       if (ret == -1) {
-               pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
-                       addr, errno);
-               return ret;
-       }
-
-       pthread_mutex_lock(&events.uffd_faults_mutex);
-       events.uffd_faults += 1;
-       pthread_mutex_unlock(&events.uffd_faults_mutex);
-       return 0;
-}
-
-static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
-{
-       return uffd_generic_handler(mode, uffd, msg, &pt_args);
-}
-
-static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
-{
-       return uffd_generic_handler(mode, uffd, msg, &data_args);
-}
-
-static void setup_uffd_args(struct userspace_mem_region *region,
-                           struct uffd_args *args)
-{
-       args->hva = (void *)region->region.userspace_addr;
-       args->paging_size = region->region.memory_size;
-
-       args->copy = malloc(args->paging_size);
-       TEST_ASSERT(args->copy, "Failed to allocate data copy.");
-       memcpy(args->copy, args->hva, args->paging_size);
-}
-
-static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
-                      struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
-{
-       struct test_desc *test = p->test_desc;
-       int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
-
-       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
-       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
-
-       *pt_uffd = NULL;
-       if (test->uffd_pt_handler)
-               *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
-                                                   pt_args.hva,
-                                                   pt_args.paging_size,
-                                                   1, test->uffd_pt_handler);
-
-       *data_uffd = NULL;
-       if (test->uffd_data_handler)
-               *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
-                                                     data_args.hva,
-                                                     data_args.paging_size,
-                                                     1, test->uffd_data_handler);
-}
-
-static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
-                     struct uffd_desc *data_uffd)
-{
-       if (test->uffd_pt_handler)
-               uffd_stop_demand_paging(pt_uffd);
-       if (test->uffd_data_handler)
-               uffd_stop_demand_paging(data_uffd);
-
-       free(pt_args.copy);
-       free(data_args.copy);
-}
-
-static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
-{
-       TEST_FAIL("There was no UFFD fault expected.");
-       return -1;
-}
-
-/* Returns false if the test should be skipped. */
-static bool punch_hole_in_backing_store(struct kvm_vm *vm,
-                                       struct userspace_mem_region *region)
-{
-       void *hva = (void *)region->region.userspace_addr;
-       uint64_t paging_size = region->region.memory_size;
-       int ret, fd = region->fd;
-
-       if (fd != -1) {
-               ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                               0, paging_size);
-               TEST_ASSERT(ret == 0, "fallocate failed");
-       } else {
-               ret = madvise(hva, paging_size, MADV_DONTNEED);
-               TEST_ASSERT(ret == 0, "madvise failed");
-       }
-
-       return true;
-}
-
-static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
-{
-       struct userspace_mem_region *region;
-       void *hva;
-
-       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       hva = (void *)region->region.userspace_addr;
-
-       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
-
-       memcpy(hva, run->mmio.data, run->mmio.len);
-       events.mmio_exits += 1;
-}
-
-static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
-{
-       uint64_t data;
-
-       memcpy(&data, run->mmio.data, sizeof(data));
-       pr_debug("addr=%lld len=%d w=%d data=%lx\n",
-                run->mmio.phys_addr, run->mmio.len,
-                run->mmio.is_write, data);
-       TEST_FAIL("There was no MMIO exit expected.");
-}
-
-static bool check_write_in_dirty_log(struct kvm_vm *vm,
-                                    struct userspace_mem_region *region,
-                                    uint64_t host_pg_nr)
-{
-       unsigned long *bmap;
-       bool first_page_dirty;
-       uint64_t size = region->region.memory_size;
-
-       /* getpage_size() is not always equal to vm->page_size */
-       bmap = bitmap_zalloc(size / getpagesize());
-       kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
-       first_page_dirty = test_bit(host_pg_nr, bmap);
-       free(bmap);
-       return first_page_dirty;
-}
-
-/* Returns true to continue the test, and false if it should be skipped. */
-static bool handle_cmd(struct kvm_vm *vm, int cmd)
-{
-       struct userspace_mem_region *data_region, *pt_region;
-       bool continue_test = true;
-       uint64_t pte_gpa, pte_pg;
-
-       data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
-       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
-       pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
-
-       if (cmd == CMD_SKIP_TEST)
-               continue_test = false;
-
-       if (cmd & CMD_HOLE_PT)
-               continue_test = punch_hole_in_backing_store(vm, pt_region);
-       if (cmd & CMD_HOLE_DATA)
-               continue_test = punch_hole_in_backing_store(vm, data_region);
-       if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
-               TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
-                           "Missing write in dirty log");
-       if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
-               TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
-                           "Missing s1ptw write in dirty log");
-       if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
-               TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
-                           "Unexpected write in dirty log");
-       if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
-               TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
-                           "Unexpected s1ptw write in dirty log");
-
-       return continue_test;
-}
-
-void fail_vcpu_run_no_handler(int ret)
-{
-       TEST_FAIL("Unexpected vcpu run failure");
-}
-
-void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
-{
-       TEST_ASSERT(errno == ENOSYS,
-                   "The mmio handler should have returned not implemented.");
-       events.fail_vcpu_runs += 1;
-}
-
-typedef uint32_t aarch64_insn_t;
-extern aarch64_insn_t __exec_test[2];
-
-noinline void __return_0x77(void)
-{
-       asm volatile("__exec_test: mov x0, #0x77\n"
-                    "ret\n");
-}
-
-/*
- * Note that this function runs on the host before the test VM starts: there's
- * no need to sync the D$ and I$ caches.
- */
-static void load_exec_code_for_test(struct kvm_vm *vm)
-{
-       uint64_t *code;
-       struct userspace_mem_region *region;
-       void *hva;
-
-       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       hva = (void *)region->region.userspace_addr;
-
-       assert(TEST_EXEC_GVA > TEST_GVA);
-       code = hva + TEST_EXEC_GVA - TEST_GVA;
-       memcpy(code, __exec_test, sizeof(__exec_test));
-}
-
-static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-                                struct test_desc *test)
-{
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_DABT_CUR, no_dabt_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_IABT_CUR, no_iabt_handler);
-}
-
-static void setup_gva_maps(struct kvm_vm *vm)
-{
-       struct userspace_mem_region *region;
-       uint64_t pte_gpa;
-
-       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       /* Map TEST_GVA first. This will install a new PTE. */
-       virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
-       /* Then map TEST_PTE_GVA to the above PTE. */
-       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
-       virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
-}
-
-enum pf_test_memslots {
-       CODE_AND_DATA_MEMSLOT,
-       PAGE_TABLE_MEMSLOT,
-       TEST_DATA_MEMSLOT,
-};
-
-/*
- * Create a memslot for code and data at pfn=0, and test-data and PT ones
- * at max_gfn.
- */
-static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
-{
-       uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
-       uint64_t guest_page_size = vm->page_size;
-       uint64_t max_gfn = vm_compute_max_gfn(vm);
-       /* Enough for 2M of code when using 4K guest pages. */
-       uint64_t code_npages = 512;
-       uint64_t pt_size, data_size, data_gpa;
-
-       /*
-        * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
-        * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs.  That's 13
-        * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
-        * twice that just in case.
-        */
-       pt_size = 26 * guest_page_size;
-
-       /* memslot sizes and gpa's must be aligned to the backing page size */
-       pt_size = align_up(pt_size, backing_src_pagesz);
-       data_size = align_up(guest_page_size, backing_src_pagesz);
-       data_gpa = (max_gfn * guest_page_size) - data_size;
-       data_gpa = align_down(data_gpa, backing_src_pagesz);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
-                                   CODE_AND_DATA_MEMSLOT, code_npages, 0);
-       vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
-       vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
-
-       vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
-                                   PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
-                                   p->test_desc->pt_memslot_flags);
-       vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
-
-       vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
-                                   data_size / guest_page_size,
-                                   p->test_desc->data_memslot_flags);
-       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
-}
-
-static void setup_ucall(struct kvm_vm *vm)
-{
-       struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-
-       ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
-}
-
-static void setup_default_handlers(struct test_desc *test)
-{
-       if (!test->mmio_handler)
-               test->mmio_handler = mmio_no_handler;
-
-       if (!test->fail_vcpu_run_handler)
-               test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
-}
-
-static void check_event_counts(struct test_desc *test)
-{
-       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
-       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
-       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
-}
-
-static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
-{
-       struct test_desc *test = p->test_desc;
-
-       pr_debug("Test: %s\n", test->name);
-       pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-       pr_debug("Testing memory backing src type: %s\n",
-                vm_mem_backing_src_alias(p->src_type)->name);
-}
-
-static void reset_event_counts(void)
-{
-       memset(&events, 0, sizeof(events));
-}
-
-/*
- * This function either succeeds, skips the test (after setting test->skip), or
- * fails with a TEST_FAIL that aborts all tests.
- */
-static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-                         struct test_desc *test)
-{
-       struct kvm_run *run;
-       struct ucall uc;
-       int ret;
-
-       run = vcpu->run;
-
-       for (;;) {
-               ret = _vcpu_run(vcpu);
-               if (ret) {
-                       test->fail_vcpu_run_handler(ret);
-                       goto done;
-               }
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       if (!handle_cmd(vm, uc.args[1])) {
-                               test->skip = true;
-                               goto done;
-                       }
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               case UCALL_NONE:
-                       if (run->exit_reason == KVM_EXIT_MMIO)
-                               test->mmio_handler(vm, run);
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
-}
-
-static void run_test(enum vm_guest_mode mode, void *arg)
-{
-       struct test_params *p = (struct test_params *)arg;
-       struct test_desc *test = p->test_desc;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       struct uffd_desc *pt_uffd, *data_uffd;
-
-       print_test_banner(mode, p);
-
-       vm = ____vm_create(VM_SHAPE(mode));
-       setup_memslots(vm, p);
-       kvm_vm_elf_load(vm, program_invocation_name);
-       setup_ucall(vm);
-       vcpu = vm_vcpu_add(vm, 0, guest_code);
-
-       setup_gva_maps(vm);
-
-       reset_event_counts();
-
-       /*
-        * Set some code in the data memslot for the guest to execute (only
-        * applicable to the EXEC tests). This has to be done before
-        * setup_uffd() as that function copies the memslot data for the uffd
-        * handler.
-        */
-       load_exec_code_for_test(vm);
-       setup_uffd(vm, p, &pt_uffd, &data_uffd);
-       setup_abort_handlers(vm, vcpu, test);
-       setup_default_handlers(test);
-       vcpu_args_set(vcpu, 1, test);
-
-       vcpu_run_loop(vm, vcpu, test);
-
-       kvm_vm_free(vm);
-       free_uffd(test, pt_uffd, data_uffd);
-
-       /*
-        * Make sure we check the events after the uffd threads have exited,
-        * which means they updated their respective event counters.
-        */
-       if (!test->skip)
-               check_event_counts(test);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-s mem-type]\n", name);
-       puts("");
-       guest_modes_help();
-       backing_src_help("-s");
-       puts("");
-}
-
-#define SNAME(s)                       #s
-#define SCAT2(a, b)                    SNAME(a ## _ ## b)
-#define SCAT3(a, b, c)                 SCAT2(a, SCAT2(b, c))
-#define SCAT4(a, b, c, d)              SCAT2(a, SCAT3(b, c, d))
-
-#define _CHECK(_test)                  _CHECK_##_test
-#define _PREPARE(_test)                        _PREPARE_##_test
-#define _PREPARE_guest_read64          NULL
-#define _PREPARE_guest_ld_preidx       NULL
-#define _PREPARE_guest_write64         NULL
-#define _PREPARE_guest_st_preidx       NULL
-#define _PREPARE_guest_exec            NULL
-#define _PREPARE_guest_at              NULL
-#define _PREPARE_guest_dc_zva          guest_check_dc_zva
-#define _PREPARE_guest_cas             guest_check_lse
-
-/* With or without access flag checks */
-#define _PREPARE_with_af               guest_set_ha, guest_clear_pte_af
-#define _PREPARE_no_af                 NULL
-#define _CHECK_with_af                 guest_check_pte_af
-#define _CHECK_no_af                   NULL
-
-/* Performs an access and checks that no faults were triggered. */
-#define TEST_ACCESS(_access, _with_af, _mark_cmd)                              \
-{                                                                              \
-       .name                   = SCAT3(_access, _with_af, #_mark_cmd),         \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .mem_mark_cmd           = _mark_cmd,                                    \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _CHECK(_with_af) },                         \
-       .expected_events        = { 0 },                                        \
-}
-
-#define TEST_UFFD(_access, _with_af, _mark_cmd,                                        \
-                 _uffd_data_handler, _uffd_pt_handler, _uffd_faults)           \
-{                                                                              \
-       .name                   = SCAT4(uffd, _access, _with_af, #_mark_cmd),   \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .mem_mark_cmd           = _mark_cmd,                                    \
-       .guest_test_check       = { _CHECK(_with_af) },                         \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = _uffd_pt_handler,                             \
-       .expected_events        = { .uffd_faults = _uffd_faults, },             \
-}
-
-#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check)              \
-{                                                                              \
-       .name                   = SCAT3(dirty_log, _access, _with_af),          \
-       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
-       .expected_events        = { 0 },                                        \
-}
-
-#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler,         \
-                               _uffd_faults, _test_check, _pt_check)           \
-{                                                                              \
-       .name                   = SCAT3(uffd_and_dirty_log, _access, _with_af), \
-       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
-       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = uffd_pt_handler,                              \
-       .expected_events        = { .uffd_faults = _uffd_faults, },             \
-}
-
-#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits)                   \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot, _access),                   \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .mmio_handler           = _mmio_handler,                                \
-       .expected_events        = { .mmio_exits = _mmio_exits },                \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME(_access)                                   \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
-       .expected_events        = { .fail_vcpu_runs = 1 },                      \
-}
-
-#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits,     \
-                                     _test_check)                              \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot, _access),                   \
-       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _test_check },                              \
-       .mmio_handler           = _mmio_handler,                                \
-       .expected_events        = { .mmio_exits = _mmio_exits},                 \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check)                \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_no_syn_and_dlog, _access),   \
-       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _test_check },                              \
-       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
-       .expected_events        = { .fail_vcpu_runs = 1 },                      \
-}
-
-#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits,          \
-                                _uffd_data_handler, _uffd_faults)              \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_uffd, _access),              \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = uffd_pt_handler,                              \
-       .mmio_handler           = _mmio_handler,                                \
-       .expected_events        = { .mmio_exits = _mmio_exits,                  \
-                                   .uffd_faults = _uffd_faults },              \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler,      \
-                                            _uffd_faults)                      \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = uffd_pt_handler,                      \
-       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
-       .expected_events        = { .fail_vcpu_runs = 1,                        \
-                                   .uffd_faults = _uffd_faults },              \
-}
-
-static struct test_desc tests[] = {
-
-       /* Check that HW is setting the Access Flag (AF) (sanity checks). */
-       TEST_ACCESS(guest_read64, with_af, CMD_NONE),
-       TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
-       TEST_ACCESS(guest_cas, with_af, CMD_NONE),
-       TEST_ACCESS(guest_write64, with_af, CMD_NONE),
-       TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
-       TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
-       TEST_ACCESS(guest_exec, with_af, CMD_NONE),
-
-       /*
-        * Punch a hole in the data backing store, and then try multiple
-        * accesses: reads should rturn zeroes, and writes should
-        * re-populate the page. Moreover, the test also check that no
-        * exception was generated in the guest.  Note that this
-        * reading/writing behavior is the same as reading/writing a
-        * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
-        * userspace.
-        */
-       TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
-
-       /*
-        * Punch holes in the data and PT backing stores and mark them for
-        * userfaultfd handling. This should result in 2 faults: the access
-        * on the data backing store, and its respective S1 page table walk
-        * (S1PTW).
-        */
-       TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       /*
-        * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
-        * The S1PTW fault should still be marked as a write.
-        */
-       TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_no_handler, uffd_pt_handler, 1),
-       TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-
-       /*
-        * Try accesses when the data and PT memory regions are both
-        * tracked for dirty logging.
-        */
-       TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
-                      guest_check_no_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_ld_preidx, with_af,
-                      guest_check_no_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
-                      guest_check_no_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-
-       /*
-        * Access when the data and PT memory regions are both marked for
-        * dirty logging and UFFD at the same time. The expected result is
-        * that writes should mark the dirty log and trigger a userfaultfd
-        * write fault.  Reads/execs should result in a read userfaultfd
-        * fault, and nothing in the dirty log.  Any S1PTW should result in
-        * a write in the dirty log and a userfaultfd write.
-        */
-       TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
-                               uffd_data_handler, 2,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_no_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
-                               uffd_data_handler,
-                               2, guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
-                               uffd_data_handler,
-                               2, guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
-                               uffd_data_handler,
-                               2, guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       /*
-        * Access when both the PT and data regions are marked read-only
-        * (with KVM_MEM_READONLY). Writes with a syndrome result in an
-        * MMIO exit, writes with no syndrome (e.g., CAS) result in a
-        * failed vcpu run, and reads/execs with and without syndroms do
-        * not fault.
-        */
-       TEST_RO_MEMSLOT(guest_read64, 0, 0),
-       TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
-       TEST_RO_MEMSLOT(guest_at, 0, 0),
-       TEST_RO_MEMSLOT(guest_exec, 0, 0),
-       TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
-       TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
-       TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
-       TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
-
-       /*
-        * The PT and data regions are both read-only and marked
-        * for dirty logging at the same time. The expected result is that
-        * for writes there should be no write in the dirty log. The
-        * readonly handling is the same as if the memslot was not marked
-        * for dirty logging: writes with a syndrome result in an MMIO
-        * exit, and writes with no syndrome result in a failed vcpu run.
-        */
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
-                                     1, guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
-                                                 guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
-                                                 guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
-                                                 guest_check_no_write_in_dirty_log),
-
-       /*
-        * The PT and data regions are both read-only and punched with
-        * holes tracked with userfaultfd.  The expected result is the
-        * union of both userfaultfd and read-only behaviors. For example,
-        * write accesses result in a userfaultfd write fault and an MMIO
-        * exit.  Writes with no syndrome result in a failed vcpu run and
-        * no userfaultfd write fault. Reads result in userfaultfd getting
-        * triggered.
-        */
-       TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
-                                uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
-
-       { 0 }
-};
-
-static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
-{
-       struct test_desc *t;
-
-       for (t = &tests[0]; t->name; t++) {
-               if (t->skip)
-                       continue;
-
-               struct test_params p = {
-                       .src_type = src_type,
-                       .test_desc = t,
-               };
-
-               for_each_guest_mode(run_test, &p);
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       enum vm_mem_backing_src_type src_type;
-       int opt;
-
-       src_type = DEFAULT_VM_MEM_SRC;
-
-       while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
-               switch (opt) {
-               case 'm':
-                       guest_modes_cmdline(optarg);
-                       break;
-               case 's':
-                       src_type = parse_backing_src_type(optarg);
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       exit(0);
-               }
-       }
-
-       for_each_test_and_guest_mode(src_type);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c

deleted file mode 100644 (file)

index ab491ee..0000000
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ /dev/null
@@ -1,290 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * psci_test - Tests relating to KVM's PSCI implementation.
- *
- * Copyright (c) 2021 Google LLC.
- *
- * This test includes:
- *  - A regression test for a race between KVM servicing the PSCI CPU_ON call
- *    and userspace reading the targeted vCPU's registers.
- *  - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
- *    KVM_SYSTEM_EVENT_SUSPEND UAPI.
- */
-
-#include <linux/kernel.h>
-#include <linux/psci.h>
-#include <asm/cputype.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
-#define CPU_ON_CONTEXT_ID 0xdeadc0deul
-
-static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
-                           uint64_t context_id)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
-                 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_affinity_info(uint64_t target_affinity,
-                                  uint64_t lowest_affinity_level)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
-                 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
-                 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_features(uint32_t func_id)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static void vcpu_power_off(struct kvm_vcpu *vcpu)
-{
-       struct kvm_mp_state mp_state = {
-               .mp_state = KVM_MP_STATE_STOPPED,
-       };
-
-       vcpu_mp_state_set(vcpu, &mp_state);
-}
-
-static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
-                              struct kvm_vcpu **target)
-{
-       struct kvm_vcpu_init init;
-       struct kvm_vm *vm;
-
-       vm = vm_create(2);
-
-       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
-       *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
-       *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
-
-       return vm;
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
-               REPORT_GUEST_ASSERT(uc);
-}
-
-static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
-{
-       uint64_t obs_pc, obs_x0;
-
-       obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-       obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
-
-       TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
-                   "unexpected target cpu pc: %lx (expected: %lx)",
-                   obs_pc, CPU_ON_ENTRY_ADDR);
-       TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
-                   "unexpected target context id: %lx (expected: %lx)",
-                   obs_x0, CPU_ON_CONTEXT_ID);
-}
-
-static void guest_test_cpu_on(uint64_t target_cpu)
-{
-       uint64_t target_state;
-
-       GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
-
-       do {
-               target_state = psci_affinity_info(target_cpu, 0);
-
-               GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
-                            (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
-       } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
-
-       GUEST_DONE();
-}
-
-static void host_test_cpu_on(void)
-{
-       struct kvm_vcpu *source, *target;
-       uint64_t target_mpidr;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       vm = setup_vm(guest_test_cpu_on, &source, &target);
-
-       /*
-        * make sure the target is already off when executing the test.
-        */
-       vcpu_power_off(target);
-
-       target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
-       vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
-       enter_guest(source);
-
-       if (get_ucall(source, &uc) != UCALL_DONE)
-               TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
-
-       assert_vcpu_reset(target);
-       kvm_vm_free(vm);
-}
-
-static void guest_test_system_suspend(void)
-{
-       uint64_t ret;
-
-       /* assert that SYSTEM_SUSPEND is discoverable */
-       GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
-       GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
-
-       ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
-       GUEST_SYNC(ret);
-}
-
-static void host_test_system_suspend(void)
-{
-       struct kvm_vcpu *source, *target;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-
-       vm = setup_vm(guest_test_system_suspend, &source, &target);
-       vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
-
-       vcpu_power_off(target);
-       run = source->run;
-
-       enter_guest(source);
-
-       TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
-       TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
-                   "Unhandled system event: %u (expected: %u)",
-                   run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
-
-       kvm_vm_free(vm);
-}
-
-static void guest_test_system_off2(void)
-{
-       uint64_t ret;
-
-       /* assert that SYSTEM_OFF2 is discoverable */
-       GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
-                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
-       GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
-                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
-
-       /* With non-zero 'cookie' field, it should fail */
-       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
-       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
-
-       /*
-        * This would normally never return, so KVM sets the return value
-        * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
-        * that it can test both values for HIBERNATE_OFF.
-        */
-       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
-       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
-
-       /*
-        * Revision F.b of the PSCI v1.3 specification documents zero as an
-        * alias for HIBERNATE_OFF, since that's the value used in earlier
-        * revisions of the spec and some implementations in the field.
-        */
-       ret = psci_system_off2(0, 1);
-       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
-
-       ret = psci_system_off2(0, 0);
-       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
-
-       GUEST_DONE();
-}
-
-static void host_test_system_off2(void)
-{
-       struct kvm_vcpu *source, *target;
-       struct kvm_mp_state mps;
-       uint64_t psci_version = 0;
-       int nr_shutdowns = 0;
-       struct kvm_run *run;
-       struct ucall uc;
-
-       setup_vm(guest_test_system_off2, &source, &target);
-
-       psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
-
-       TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
-                   "Unexpected PSCI version %lu.%lu",
-                   PSCI_VERSION_MAJOR(psci_version),
-                   PSCI_VERSION_MINOR(psci_version));
-
-       vcpu_power_off(target);
-       run = source->run;
-
-       enter_guest(source);
-       while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
-               TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
-                           "Unhandled system event: %u (expected: %u)",
-                           run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
-               TEST_ASSERT(run->system_event.ndata >= 1,
-                           "Unexpected amount of system event data: %u (expected, >= 1)",
-                           run->system_event.ndata);
-               TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
-                           "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
-                           run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
-
-               nr_shutdowns++;
-
-               /* Restart the vCPU */
-               mps.mp_state = KVM_MP_STATE_RUNNABLE;
-               vcpu_mp_state_set(source, &mps);
-
-               enter_guest(source);
-       }
-
-       TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
-       TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
-}
-
-int main(void)
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
-
-       host_test_cpu_on();
-       host_test_system_suspend();
-       host_test_system_off2();
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c

deleted file mode 100644 (file)

index bc6cf50..0000000
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * set_id_regs - Test for setting ID register from usersapce.
- *
- * Copyright (c) 2023 Google LLC.
- *
- *
- * Test that KVM supports setting ID registers from userspace and handles the
- * feature set correctly.
- */
-
-#include <stdint.h>
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-#include <linux/bitfield.h>
-
-enum ftr_type {
-       FTR_EXACT,                      /* Use a predefined safe value */
-       FTR_LOWER_SAFE,                 /* Smaller value is safe */
-       FTR_HIGHER_SAFE,                /* Bigger value is safe */
-       FTR_HIGHER_OR_ZERO_SAFE,        /* Bigger value is safe, but 0 is biggest */
-       FTR_END,                        /* Mark the last ftr bits */
-};
-
-#define FTR_SIGNED     true    /* Value should be treated as signed */
-#define FTR_UNSIGNED   false   /* Value should be treated as unsigned */
-
-struct reg_ftr_bits {
-       char *name;
-       bool sign;
-       enum ftr_type type;
-       uint8_t shift;
-       uint64_t mask;
-       /*
-        * For FTR_EXACT, safe_val is used as the exact safe value.
-        * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
-        */
-       int64_t safe_val;
-};
-
-struct test_feature_reg {
-       uint32_t reg;
-       const struct reg_ftr_bits *ftr_bits;
-};
-
-#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL)      \
-       {                                                               \
-               .name = #NAME,                                          \
-               .sign = SIGNED,                                         \
-               .type = TYPE,                                           \
-               .shift = SHIFT,                                         \
-               .mask = MASK,                                           \
-               .safe_val = SAFE_VAL,                                   \
-       }
-
-#define REG_FTR_BITS(type, reg, field, safe_val) \
-       __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
-                      reg##_##field##_MASK, safe_val)
-
-#define S_REG_FTR_BITS(type, reg, field, safe_val) \
-       __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
-                      reg##_##field##_MASK, safe_val)
-
-#define REG_FTR_END                                    \
-       {                                               \
-               .type = FTR_END,                        \
-       }
-
-static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
-       REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
-       REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
-       REG_FTR_END,
-};
-
-#define TEST_REG(id, table)                    \
-       {                                       \
-               .reg = id,                      \
-               .ftr_bits = &((table)[0]),      \
-       }
-
-static struct test_feature_reg test_regs[] = {
-       TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
-       TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
-       TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
-       TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
-       TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
-       TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
-       TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
-       TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
-       TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
-       TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
-       TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
-};
-
-#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
-
-static void guest_code(void)
-{
-       GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
-       GUEST_REG_SYNC(SYS_CTR_EL0);
-
-       GUEST_DONE();
-}
-
-/* Return a safe value to a given ftr_bits an ftr value */
-uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
-{
-       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
-
-       if (ftr_bits->sign == FTR_UNSIGNED) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = ftr_bits->safe_val;
-                       break;
-               case FTR_LOWER_SAFE:
-                       if (ftr > ftr_bits->safe_val)
-                               ftr--;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       if (ftr < ftr_max)
-                               ftr++;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr == ftr_max)
-                               ftr = 0;
-                       else if (ftr != 0)
-                               ftr++;
-                       break;
-               default:
-                       break;
-               }
-       } else if (ftr != ftr_max) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = ftr_bits->safe_val;
-                       break;
-               case FTR_LOWER_SAFE:
-                       if (ftr > ftr_bits->safe_val)
-                               ftr--;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       if (ftr < ftr_max - 1)
-                               ftr++;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr != 0 && ftr != ftr_max - 1)
-                               ftr++;
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       return ftr;
-}
-
-/* Return an invalid value to a given ftr_bits an ftr value */
-uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
-{
-       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
-
-       if (ftr_bits->sign == FTR_UNSIGNED) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
-                       break;
-               case FTR_LOWER_SAFE:
-                       ftr++;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       ftr--;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr == 0)
-                               ftr = ftr_max;
-                       else
-                               ftr--;
-                       break;
-               default:
-                       break;
-               }
-       } else if (ftr != ftr_max) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
-                       break;
-               case FTR_LOWER_SAFE:
-                       ftr++;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       ftr--;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr == 0)
-                               ftr = ftr_max - 1;
-                       else
-                               ftr--;
-                       break;
-               default:
-                       break;
-               }
-       } else {
-               ftr = 0;
-       }
-
-       return ftr;
-}
-
-static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
-                                    const struct reg_ftr_bits *ftr_bits)
-{
-       uint8_t shift = ftr_bits->shift;
-       uint64_t mask = ftr_bits->mask;
-       uint64_t val, new_val, ftr;
-
-       val = vcpu_get_reg(vcpu, reg);
-       ftr = (val & mask) >> shift;
-
-       ftr = get_safe_value(ftr_bits, ftr);
-
-       ftr <<= shift;
-       val &= ~mask;
-       val |= ftr;
-
-       vcpu_set_reg(vcpu, reg, val);
-       new_val = vcpu_get_reg(vcpu, reg);
-       TEST_ASSERT_EQ(new_val, val);
-
-       return new_val;
-}
-
-static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
-                             const struct reg_ftr_bits *ftr_bits)
-{
-       uint8_t shift = ftr_bits->shift;
-       uint64_t mask = ftr_bits->mask;
-       uint64_t val, old_val, ftr;
-       int r;
-
-       val = vcpu_get_reg(vcpu, reg);
-       ftr = (val & mask) >> shift;
-
-       ftr = get_invalid_value(ftr_bits, ftr);
-
-       old_val = val;
-       ftr <<= shift;
-       val &= ~mask;
-       val |= ftr;
-
-       r = __vcpu_set_reg(vcpu, reg, val);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
-
-       val = vcpu_get_reg(vcpu, reg);
-       TEST_ASSERT_EQ(val, old_val);
-}
-
-static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-
-#define encoding_to_range_idx(encoding)                                                        \
-       KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding),      \
-                                    sys_reg_CRn(encoding), sys_reg_CRm(encoding),      \
-                                    sys_reg_Op2(encoding))
-
-
-static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
-{
-       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-       struct reg_mask_range range = {
-               .addr = (__u64)masks,
-       };
-       int ret;
-
-       /* KVM should return error when reserved field is not zero */
-       range.reserved[0] = 1;
-       ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-       TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
-
-       /* Get writable masks for feature ID registers */
-       memset(range.reserved, 0, sizeof(range.reserved));
-       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-
-       for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
-               const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
-               uint32_t reg_id = test_regs[i].reg;
-               uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
-               int idx;
-
-               /* Get the index to masks array for the idreg */
-               idx = encoding_to_range_idx(reg_id);
-
-               for (int j = 0;  ftr_bits[j].type != FTR_END; j++) {
-                       /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
-                       if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
-                               ksft_test_result_skip("%s on AARCH64 only system\n",
-                                                     ftr_bits[j].name);
-                               continue;
-                       }
-
-                       /* Make sure the feature field is writable */
-                       TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
-
-                       test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
-
-                       test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
-                                                                 &ftr_bits[j]);
-
-                       ksft_test_result_pass("%s\n", ftr_bits[j].name);
-               }
-       }
-}
-
-#define MPAM_IDREG_TEST        6
-static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
-{
-       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-       struct reg_mask_range range = {
-               .addr = (__u64)masks,
-       };
-       uint64_t val;
-       int idx, err;
-
-       /*
-        * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
-        * check that if it can be set to 1, (i.e. it is supported by the
-        * hardware), that it can't be set to other values.
-        */
-
-       /* Get writable masks for feature ID registers */
-       memset(range.reserved, 0, sizeof(range.reserved));
-       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-
-       /* Writeable? Nothing to test! */
-       idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
-       if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
-               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
-               return;
-       }
-
-       /* Get the id register value */
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-
-       /* Try to set MPAM=0. This should always be possible. */
-       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
-       if (err)
-               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
-
-       /* Try to set MPAM=1 */
-       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
-       if (err)
-               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
-
-       /* Try to set MPAM=2 */
-       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
-       if (err)
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
-       else
-               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
-
-       /* And again for ID_AA64PFR1_EL1.MPAM_frac */
-       idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
-       if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
-               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
-               return;
-       }
-
-       /* Get the id register value */
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
-
-       /* Try to set MPAM_frac=0. This should always be possible. */
-       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
-       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
-       if (err)
-               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
-
-       /* Try to set MPAM_frac=1 */
-       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
-       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
-       if (err)
-               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
-
-       /* Try to set MPAM_frac=2 */
-       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
-       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
-       if (err)
-               ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
-       else
-               ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
-}
-
-static void test_guest_reg_read(struct kvm_vcpu *vcpu)
-{
-       bool done = false;
-       struct ucall uc;
-
-       while (!done) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_SYNC:
-                       /* Make sure the written values are seen by guest */
-                       TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
-                                      uc.args[3]);
-                       break;
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               default:
-                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-               }
-       }
-}
-
-/* Politely lifted from arch/arm64/include/asm/cache.h */
-/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
-#define CLIDR_CTYPE_SHIFT(level)       (3 * (level - 1))
-#define CLIDR_CTYPE_MASK(level)                (7 << CLIDR_CTYPE_SHIFT(level))
-#define CLIDR_CTYPE(clidr, level)      \
-       (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
-
-static void test_clidr(struct kvm_vcpu *vcpu)
-{
-       uint64_t clidr;
-       int level;
-
-       clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
-
-       /* find the first empty level in the cache hierarchy */
-       for (level = 1; level < 7; level++) {
-               if (!CLIDR_CTYPE(clidr, level))
-                       break;
-       }
-
-       /*
-        * If you have a mind-boggling 7 levels of cache, congratulations, you
-        * get to fix this.
-        */
-       TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
-
-       /* stick in a unified cache level */
-       clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
-       test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
-}
-
-static void test_ctr(struct kvm_vcpu *vcpu)
-{
-       u64 ctr;
-
-       ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
-       ctr &= ~CTR_EL0_DIC_MASK;
-       if (ctr & CTR_EL0_IminLine_MASK)
-               ctr--;
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
-       test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
-}
-
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
-{
-       u64 val;
-
-       test_clidr(vcpu);
-       test_ctr(vcpu);
-
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
-       val++;
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
-
-       test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
-       ksft_test_result_pass("%s\n", __func__);
-}
-
-static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
-{
-       size_t idx = encoding_to_range_idx(encoding);
-       uint64_t observed;
-
-       observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
-       TEST_ASSERT_EQ(test_reg_vals[idx], observed);
-}
-
-static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
-{
-       /*
-        * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
-        * architectural reset of the vCPU.
-        */
-       aarch64_vcpu_setup(vcpu, NULL);
-
-       for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
-               test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
-
-       test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
-       test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
-       test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
-
-       ksft_test_result_pass("%s\n", __func__);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       bool aarch64_only;
-       uint64_t val, el0;
-       int test_cnt;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Check for AARCH64 only system */
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
-       aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
-
-       ksft_print_header();
-
-       test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
-                  ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
-                  ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
-                  ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
-                  ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
-                  ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
-                  MPAM_IDREG_TEST;
-
-       ksft_set_plan(test_cnt);
-
-       test_vm_ftr_id_regs(vcpu, aarch64_only);
-       test_vcpu_ftr_id_regs(vcpu);
-       test_user_set_mpam_reg(vcpu);
-
-       test_guest_reg_read(vcpu);
-
-       test_reset_preserves_id_regs(vcpu);
-
-       kvm_vm_free(vm);
-
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c

deleted file mode 100644 (file)

index 2d189f3..0000000
--- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c
+++ /dev/null
@@ -1,268 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * smccc_filter - Tests for the SMCCC filter UAPI.
- *
- * Copyright (c) 2023 Google LLC
- *
- * This test includes:
- *  - Tests that the UAPI constraints are upheld by KVM. For example, userspace
- *    is prevented from filtering the architecture range of SMCCC calls.
- *  - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/psci.h>
-#include <stdint.h>
-
-#include "processor.h"
-#include "test_util.h"
-
-enum smccc_conduit {
-       HVC_INSN,
-       SMC_INSN,
-};
-
-#define for_each_conduit(conduit)                                      \
-       for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
-
-static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
-{
-       struct arm_smccc_res res;
-
-       if (conduit == SMC_INSN)
-               smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
-       else
-               smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
-
-       GUEST_SYNC(res.a0);
-}
-
-static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
-                             enum kvm_smccc_filter_action action)
-{
-       struct kvm_smccc_filter filter = {
-               .base           = start,
-               .nr_functions   = nr_functions,
-               .action         = action,
-       };
-
-       return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
-                                    KVM_ARM_VM_SMCCC_FILTER, &filter);
-}
-
-static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
-                            enum kvm_smccc_filter_action action)
-{
-       int ret = __set_smccc_filter(vm, start, nr_functions, action);
-
-       TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
-}
-
-static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
-{
-       struct kvm_vcpu_init init;
-       struct kvm_vm *vm;
-
-       vm = vm_create(1);
-       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-
-       /*
-        * Enable in-kernel emulation of PSCI to ensure that calls are denied
-        * due to the SMCCC filter, not because of KVM.
-        */
-       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
-       *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
-       return vm;
-}
-
-static void test_pad_must_be_zero(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       struct kvm_smccc_filter filter = {
-               .base           = PSCI_0_2_FN_PSCI_VERSION,
-               .nr_functions   = 1,
-               .action         = KVM_SMCCC_FILTER_DENY,
-               .pad            = { -1 },
-       };
-       int r;
-
-       r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
-                                 KVM_ARM_VM_SMCCC_FILTER, &filter);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Setting filter with nonzero padding should return EINVAL");
-}
-
-/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
-static void test_filter_reserved_range(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       uint32_t smc64_fn;
-       int r;
-
-       r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
-                              1, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EEXIST,
-                   "Attempt to filter reserved range should return EEXIST");
-
-       smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
-                                     0, 0);
-
-       r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EEXIST,
-                   "Attempt to filter reserved range should return EEXIST");
-
-       kvm_vm_free(vm);
-}
-
-static void test_invalid_nr_functions(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Attempt to filter 0 functions should return EINVAL");
-
-       kvm_vm_free(vm);
-}
-
-static void test_overflow_nr_functions(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Attempt to overflow filter range should return EINVAL");
-
-       kvm_vm_free(vm);
-}
-
-static void test_reserved_action(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Attempt to use reserved filter action should return EINVAL");
-
-       kvm_vm_free(vm);
-}
-
-
-/* Test that overlapping configurations of the SMCCC filter are rejected */
-static void test_filter_overlap(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
-
-       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EEXIST,
-                   "Attempt to filter already configured range should return EEXIST");
-
-       kvm_vm_free(vm);
-}
-
-static void expect_call_denied(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (get_ucall(vcpu, &uc) != UCALL_SYNC)
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-
-       TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
-                   "Unexpected SMCCC return code: %lu", uc.args[1]);
-}
-
-/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
-static void test_filter_denied(void)
-{
-       enum smccc_conduit conduit;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       for_each_conduit(conduit) {
-               vm = setup_vm(&vcpu);
-
-               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
-               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
-               vcpu_run(vcpu);
-               expect_call_denied(vcpu);
-
-               kvm_vm_free(vm);
-       }
-}
-
-static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
-                                   enum smccc_conduit conduit)
-{
-       struct kvm_run *run = vcpu->run;
-
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
-                   "Unexpected exit reason: %u", run->exit_reason);
-       TEST_ASSERT(run->hypercall.nr == func_id,
-                   "Unexpected SMCCC function: %llu", run->hypercall.nr);
-
-       if (conduit == SMC_INSN)
-               TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
-                           "KVM_HYPERCALL_EXIT_SMC is not set");
-       else
-               TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
-                           "KVM_HYPERCALL_EXIT_SMC is set");
-}
-
-/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
-static void test_filter_fwd_to_user(void)
-{
-       enum smccc_conduit conduit;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       for_each_conduit(conduit) {
-               vm = setup_vm(&vcpu);
-
-               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
-               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
-               vcpu_run(vcpu);
-               expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
-               kvm_vm_free(vm);
-       }
-}
-
-static bool kvm_supports_smccc_filter(void)
-{
-       struct kvm_vm *vm = vm_create_barebones();
-       int r;
-
-       r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
-
-       kvm_vm_free(vm);
-       return !r;
-}
-
-int main(void)
-{
-       TEST_REQUIRE(kvm_supports_smccc_filter());
-
-       test_pad_must_be_zero();
-       test_invalid_nr_functions();
-       test_overflow_nr_functions();
-       test_reserved_action();
-       test_filter_reserved_range();
-       test_filter_overlap();
-       test_filter_denied();
-       test_filter_fwd_to_user();
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c

deleted file mode 100644 (file)

index 80b74c6..0000000
--- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
- *
- * Copyright (c) 2022 Google LLC.
- *
- * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
- * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
- * configured.
- */
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-
-/*
- * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
- * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
- */
-static int add_init_2vcpus(struct kvm_vcpu_init *init0,
-                          struct kvm_vcpu_init *init1)
-{
-       struct kvm_vcpu *vcpu0, *vcpu1;
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones();
-
-       vcpu0 = __vm_vcpu_add(vm, 0);
-       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
-       if (ret)
-               goto free_exit;
-
-       vcpu1 = __vm_vcpu_add(vm, 1);
-       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
-
-free_exit:
-       kvm_vm_free(vm);
-       return ret;
-}
-
-/*
- * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
- * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
- */
-static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
-                                 struct kvm_vcpu_init *init1)
-{
-       struct kvm_vcpu *vcpu0, *vcpu1;
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones();
-
-       vcpu0 = __vm_vcpu_add(vm, 0);
-       vcpu1 = __vm_vcpu_add(vm, 1);
-
-       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
-       if (ret)
-               goto free_exit;
-
-       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
-
-free_exit:
-       kvm_vm_free(vm);
-       return ret;
-}
-
-/*
- * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
- * configured, and two mixed-width vCPUs cannot be configured.
- * Each of those three cases, configure vCPUs in two different orders.
- * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
- * KVM_ARM_VCPU_INIT for them.
- * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
- * and then run those commands for another vCPU.
- */
-int main(void)
-{
-       struct kvm_vcpu_init init0, init1;
-       struct kvm_vm *vm;
-       int ret;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
-
-       /* Get the preferred target type and copy that to init1 for later use */
-       vm = vm_create_barebones();
-       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
-       kvm_vm_free(vm);
-       init1 = init0;
-
-       /* Test with 64bit vCPUs */
-       ret = add_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
-       ret = add_2vcpus_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
-
-       /* Test with 32bit vCPUs */
-       init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
-       ret = add_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
-       ret = add_2vcpus_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
-
-       /* Test with mixed-width vCPUs  */
-       init0.features[0] = 0;
-       init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
-       ret = add_init_2vcpus(&init0, &init1);
-       TEST_ASSERT(ret != 0,
-                   "Configuring mixed-width vCPUs worked unexpectedly");
-       ret = add_2vcpus_init_2vcpus(&init0, &init1);
-       TEST_ASSERT(ret != 0,
-                   "Configuring mixed-width vCPUs worked unexpectedly");
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c

deleted file mode 100644 (file)

index b3b5fb0..0000000
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ /dev/null
@@ -1,764 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic init sequence tests
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <linux/kernel.h>
-#include <sys/syscall.h>
-#include <asm/kvm.h>
-#include <asm/kvm_para.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vgic.h"
-
-#define NR_VCPUS               4
-
-#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-
-#define GICR_TYPER 0x8
-
-#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
-#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
-
-struct vm_gic {
-       struct kvm_vm *vm;
-       int gic_fd;
-       uint32_t gic_dev_type;
-};
-
-static uint64_t max_phys_size;
-
-/*
- * Helpers to access a redistributor register and verify the ioctl() failed or
- * succeeded as expected, and provided the correct value on success.
- */
-static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
-                                   int want, const char *msg)
-{
-       uint32_t ignored_val;
-       int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
-                                       REG_OFFSET(vcpu, offset), &ignored_val);
-
-       TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
-}
-
-static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
-                             const char *msg)
-{
-       uint32_t val;
-
-       kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
-                           REG_OFFSET(vcpu, offset), &val);
-       TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
-}
-
-/* dummy guest code */
-static void guest_code(void)
-{
-       GUEST_SYNC(0);
-       GUEST_SYNC(1);
-       GUEST_SYNC(2);
-       GUEST_DONE();
-}
-
-/* we don't want to assert on run execution, hence that helper */
-static int run_vcpu(struct kvm_vcpu *vcpu)
-{
-       return __vcpu_run(vcpu) ? -errno : 0;
-}
-
-static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
-                                             uint32_t nr_vcpus,
-                                             struct kvm_vcpu *vcpus[])
-{
-       struct vm_gic v;
-
-       v.gic_dev_type = gic_dev_type;
-       v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
-       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
-       return v;
-}
-
-static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
-{
-       struct vm_gic v;
-
-       v.gic_dev_type = gic_dev_type;
-       v.vm = vm_create_barebones();
-       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
-       return v;
-}
-
-
-static void vm_gic_destroy(struct vm_gic *v)
-{
-       close(v->gic_fd);
-       kvm_vm_free(v->vm);
-}
-
-struct vgic_region_attr {
-       uint64_t attr;
-       uint64_t size;
-       uint64_t alignment;
-};
-
-struct vgic_region_attr gic_v3_dist_region = {
-       .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
-       .size = 0x10000,
-       .alignment = 0x10000,
-};
-
-struct vgic_region_attr gic_v3_redist_region = {
-       .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
-       .size = NR_VCPUS * 0x20000,
-       .alignment = 0x10000,
-};
-
-struct vgic_region_attr gic_v2_dist_region = {
-       .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
-       .size = 0x1000,
-       .alignment = 0x1000,
-};
-
-struct vgic_region_attr gic_v2_cpu_region = {
-       .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
-       .size = 0x2000,
-       .alignment = 0x1000,
-};
-
-/**
- * Helper routine that performs KVM device tests in general. Eventually the
- * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
- * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
- * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
- * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
- * DIST region @0x1000.
- */
-static void subtest_dist_rdist(struct vm_gic *v)
-{
-       int ret;
-       uint64_t addr;
-       struct vgic_region_attr rdist; /* CPU interface in GICv2*/
-       struct vgic_region_attr dist;
-
-       rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
-                                               : gic_v2_cpu_region;
-       dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
-                                               : gic_v2_dist_region;
-
-       /* Check existing group/attributes */
-       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
-
-       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
-
-       /* check non existing attribute */
-       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
-       TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
-
-       /* misaligned DIST and REDIST address settings */
-       addr = dist.alignment / 0x10;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   dist.attr, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
-
-       addr = rdist.alignment / 0x10;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
-
-       /* out of range address */
-       addr = max_phys_size;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   dist.attr, &addr);
-       TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
-
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
-
-       /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
-       addr = max_phys_size - dist.alignment;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-                       "half of the redist is beyond IPA limit");
-
-       /* set REDIST base address @0x0*/
-       addr = 0x00000;
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           rdist.attr, &addr);
-
-       /* Attempt to create a second legacy redistributor region */
-       addr = 0xE0000;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
-
-       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                    KVM_VGIC_V3_ADDR_TYPE_REDIST);
-       if (!ret) {
-               /* Attempt to mix legacy and new redistributor regions */
-               addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
-               ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-               TEST_ASSERT(ret && errno == EINVAL,
-                           "attempt to mix GICv3 REDIST and REDIST_REGION");
-       }
-
-       /*
-        * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
-        * on first vcpu run instead.
-        */
-       addr = rdist.size - rdist.alignment;
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           dist.attr, &addr);
-}
-
-/* Test the new REDIST region API */
-static void subtest_v3_redist_regions(struct vm_gic *v)
-{
-       uint64_t addr, expected_addr;
-       int ret;
-
-       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST);
-       TEST_ASSERT(!ret, "Multiple redist regions advertised");
-
-       addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "attempt to register the first rdist region with index != 0");
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "register an rdist region overlapping with another one");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-                   "register redist region with base address beyond IPA range");
-
-       /* The last redist is above the pa range. */
-       addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-                   "register redist region with top address beyond IPA range");
-
-       addr = 0x260000;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
-
-       /*
-        * Now there are 2 redist regions:
-        * region 0 @ 0x200000 2 redists
-        * region 1 @ 0x240000 1 redist
-        * Attempt to read their characteristics
-        */
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
-       expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
-       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
-       expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
-       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
-       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
-
-       addr = 0x260000;
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
-}
-
-/*
- * VGIC KVM device is created and initialized before the secondary CPUs
- * get created
- */
-static void test_vgic_then_vcpus(uint32_t gic_dev_type)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       int ret, i;
-
-       v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
-
-       subtest_dist_rdist(&v);
-
-       /* Add the rest of the VCPUs */
-       for (i = 1; i < NR_VCPUS; ++i)
-               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-/* All the VCPUs are created before the VGIC KVM device gets initialized */
-static void test_vcpus_then_vgic(uint32_t gic_dev_type)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       int ret;
-
-       v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
-
-       subtest_dist_rdist(&v);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-#define KVM_VGIC_V2_ATTR(offset, cpu) \
-       (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
-        FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
-
-#define GIC_CPU_CTRL   0x00
-
-static void test_v2_uaccess_cpuif_no_vcpus(void)
-{
-       struct vm_gic v;
-       u64 val = 0;
-       int ret;
-
-       v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
-       subtest_dist_rdist(&v);
-
-       ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
-                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "accessed non-existent CPU interface, want errno: %i",
-                   EINVAL);
-       ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
-                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "accessed non-existent CPU interface, want errno: %i",
-                   EINVAL);
-       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
-                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "accessed non-existent CPU interface, want errno: %i",
-                   EINVAL);
-
-       vm_gic_destroy(&v);
-}
-
-static void test_v3_new_redist_regions(void)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       void *dummy = NULL;
-       struct vm_gic v;
-       uint64_t addr;
-       int ret;
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       subtest_v3_redist_regions(&v);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
-       vm_gic_destroy(&v);
-
-       /* step2 */
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       subtest_v3_redist_regions(&v);
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
-
-       vm_gic_destroy(&v);
-
-       /* step 3 */
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       subtest_v3_redist_regions(&v);
-
-       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
-       TEST_ASSERT(ret && errno == EFAULT,
-                   "register a third region allowing to cover the 4 vcpus");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(!ret, "vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-static void test_v3_typer_accesses(void)
-{
-       struct vm_gic v;
-       uint64_t addr;
-       int ret, i;
-
-       v.vm = vm_create(NR_VCPUS);
-       (void)vm_vcpu_add(v.vm, 0, guest_code);
-
-       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-
-       (void)vm_vcpu_add(v.vm, 3, guest_code);
-
-       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
-                               "attempting to read GICR_TYPER of non created vcpu");
-
-       (void)vm_vcpu_add(v.vm, 1, guest_code);
-
-       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
-                               "read GICR_TYPER before GIC initialized");
-
-       (void)vm_vcpu_add(v.vm, 2, guest_code);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       for (i = 0; i < NR_VCPUS ; i++) {
-               v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
-                                 "read GICR_TYPER before rdist region setting");
-       }
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       /* The 2 first rdists should be put there (vcpu 0 and 3) */
-       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
-       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
-
-       addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
-       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
-
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
-                         "no redist region attached to vcpu #1 yet, last cannot be returned");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
-                         "no redist region attached to vcpu #2, last cannot be returned");
-
-       addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
-                         "read typer of rdist #1, last properly returned");
-
-       vm_gic_destroy(&v);
-}
-
-static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
-                                                  uint32_t vcpuids[])
-{
-       struct vm_gic v;
-       int i;
-
-       v.vm = vm_create(nr_vcpus);
-       for (i = 0; i < nr_vcpus; i++)
-               vm_vcpu_add(v.vm, vcpuids[i], guest_code);
-
-       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-
-       return v;
-}
-
-/**
- * Test GICR_TYPER last bit with new redist regions
- * rdist regions #1 and #2 are contiguous
- * rdist region #0 @0x100000 2 rdist capacity
- *     rdists: 0, 3 (Last)
- * rdist region #1 @0x240000 2 rdist capacity
- *     rdists:  5, 4 (Last)
- * rdist region #2 @0x200000 2 rdist capacity
- *     rdists: 1, 2
- */
-static void test_v3_last_bit_redist_regions(void)
-{
-       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
-       struct vm_gic v;
-       uint64_t addr;
-
-       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
-       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
-       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
-       v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
-
-       vm_gic_destroy(&v);
-}
-
-/* Test last bit with legacy region */
-static void test_v3_last_bit_single_rdist(void)
-{
-       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
-       struct vm_gic v;
-       uint64_t addr;
-
-       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       addr = 0x10000;
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-
-       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
-       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
-       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
-
-       vm_gic_destroy(&v);
-}
-
-/* Uses the legacy REDIST region API. */
-static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       int ret, i;
-       uint64_t addr;
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
-
-       /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
-       addr = max_phys_size - (3 * 2 * 0x10000);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-
-       addr = 0x00000;
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
-
-       /* Add the rest of the VCPUs */
-       for (i = 1; i < NR_VCPUS; ++i)
-               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       /* Attempt to run a vcpu without enough redist space. */
-       ret = run_vcpu(vcpus[2]);
-       TEST_ASSERT(ret && errno == EINVAL,
-               "redist base+size above PA range detected on 1st vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-static void test_v3_its_region(void)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       uint64_t addr;
-       int its_fd, ret;
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
-
-       addr = 0x401000;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-               "ITS region with misaligned address");
-
-       addr = max_phys_size;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-               "register ITS region with base address beyond IPA range");
-
-       addr = max_phys_size - 0x10000;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-               "Half of ITS region is beyond IPA range");
-
-       /* This one succeeds setting the ITS base */
-       addr = 0x400000;
-       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_ITS_ADDR_TYPE, &addr);
-
-       addr = 0x300000;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
-
-       close(its_fd);
-       vm_gic_destroy(&v);
-}
-
-/*
- * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
- */
-int test_kvm_device(uint32_t gic_dev_type)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       uint32_t other;
-       int ret;
-
-       v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
-
-       /* try to create a non existing KVM device */
-       ret = __kvm_test_create_device(v.vm, 0);
-       TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
-
-       /* trial mode */
-       ret = __kvm_test_create_device(v.vm, gic_dev_type);
-       if (ret)
-               return ret;
-       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
-       ret = __kvm_create_device(v.vm, gic_dev_type);
-       TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
-
-       /* try to create the other gic_dev_type */
-       other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
-                                            : KVM_DEV_TYPE_ARM_VGIC_V2;
-
-       if (!__kvm_test_create_device(v.vm, other)) {
-               ret = __kvm_create_device(v.vm, other);
-               TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
-                               "create GIC device while other version exists");
-       }
-
-       vm_gic_destroy(&v);
-
-       return 0;
-}
-
-void run_tests(uint32_t gic_dev_type)
-{
-       test_vcpus_then_vgic(gic_dev_type);
-       test_vgic_then_vcpus(gic_dev_type);
-
-       if (VGIC_DEV_IS_V2(gic_dev_type))
-               test_v2_uaccess_cpuif_no_vcpus();
-
-       if (VGIC_DEV_IS_V3(gic_dev_type)) {
-               test_v3_new_redist_regions();
-               test_v3_typer_accesses();
-               test_v3_last_bit_redist_regions();
-               test_v3_last_bit_single_rdist();
-               test_v3_redist_ipa_range_check_at_vcpu_run();
-               test_v3_its_region();
-       }
-}
-
-int main(int ac, char **av)
-{
-       int ret;
-       int pa_bits;
-       int cnt_impl = 0;
-
-       pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
-       max_phys_size = 1ULL << pa_bits;
-
-       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
-       if (!ret) {
-               pr_info("Running GIC_v3 tests.\n");
-               run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
-               cnt_impl++;
-       }
-
-       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
-       if (!ret) {
-               pr_info("Running GIC_v2 tests.\n");
-               run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
-               cnt_impl++;
-       }
-
-       if (!cnt_impl) {
-               print_skip("No GICv2 nor GICv3 support");
-               exit(KSFT_SKIP);
-       }
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c

deleted file mode 100644 (file)

index f4ac28d..0000000
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ /dev/null
@@ -1,847 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic_irq.c - Test userspace injection of IRQs
- *
- * This test validates the injection of IRQs from userspace using various
- * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
- * host to inject a specific intid via a GUEST_SYNC call, and then checks that
- * it received it.
- */
-#include <asm/kvm.h>
-#include <asm/kvm_para.h>
-#include <sys/eventfd.h>
-#include <linux/sizes.h>
-
-#include "processor.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "vgic.h"
-
-/*
- * Stores the user specified args; it's passed to the guest and to every test
- * function.
- */
-struct test_args {
-       uint32_t nr_irqs; /* number of KVM supported IRQs. */
-       bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
-       bool level_sensitive; /* 1 is level, 0 is edge */
-       int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
-       bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
-};
-
-/*
- * KVM implements 32 priority levels:
- * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
- *
- * Note that these macros will still be correct in the case that KVM implements
- * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
- */
-#define KVM_NUM_PRIOS          32
-#define KVM_PRIO_SHIFT         3 /* steps of 8 = 1 << 3 */
-#define KVM_PRIO_STEPS         (1 << KVM_PRIO_SHIFT) /* 8 */
-#define LOWEST_PRIO            (KVM_NUM_PRIOS - 1)
-#define CPU_PRIO_MASK          (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
-#define IRQ_DEFAULT_PRIO       (LOWEST_PRIO - 1)
-#define IRQ_DEFAULT_PRIO_REG   (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
-
-/*
- * The kvm_inject_* utilities are used by the guest to ask the host to inject
- * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
- */
-
-typedef enum {
-       KVM_INJECT_EDGE_IRQ_LINE = 1,
-       KVM_SET_IRQ_LINE,
-       KVM_SET_IRQ_LINE_HIGH,
-       KVM_SET_LEVEL_INFO_HIGH,
-       KVM_INJECT_IRQFD,
-       KVM_WRITE_ISPENDR,
-       KVM_WRITE_ISACTIVER,
-} kvm_inject_cmd;
-
-struct kvm_inject_args {
-       kvm_inject_cmd cmd;
-       uint32_t first_intid;
-       uint32_t num;
-       int level;
-       bool expect_failure;
-};
-
-/* Used on the guest side to perform the hypercall. */
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
-               uint32_t num, int level, bool expect_failure);
-
-/* Used on the host side to get the hypercall info. */
-static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
-               struct kvm_inject_args *args);
-
-#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure)                     \
-       kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
-
-#define KVM_INJECT_MULTI(cmd, intid, num)                                      \
-       _KVM_INJECT_MULTI(cmd, intid, num, false)
-
-#define _KVM_INJECT(cmd, intid, expect_failure)                                        \
-       _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
-
-#define KVM_INJECT(cmd, intid)                                                 \
-       _KVM_INJECT_MULTI(cmd, intid, 1, false)
-
-#define KVM_ACTIVATE(cmd, intid)                                               \
-       kvm_inject_call(cmd, intid, 1, 1, false);
-
-struct kvm_inject_desc {
-       kvm_inject_cmd cmd;
-       /* can inject PPIs, PPIs, and/or SPIs. */
-       bool sgi, ppi, spi;
-};
-
-static struct kvm_inject_desc inject_edge_fns[] = {
-       /*                                      sgi    ppi    spi */
-       { KVM_INJECT_EDGE_IRQ_LINE,             false, false, true },
-       { KVM_INJECT_IRQFD,                     false, false, true },
-       { KVM_WRITE_ISPENDR,                    true,  false, true },
-       { 0, },
-};
-
-static struct kvm_inject_desc inject_level_fns[] = {
-       /*                                      sgi    ppi    spi */
-       { KVM_SET_IRQ_LINE_HIGH,                false, true,  true },
-       { KVM_SET_LEVEL_INFO_HIGH,              false, true,  true },
-       { KVM_INJECT_IRQFD,                     false, false, true },
-       { KVM_WRITE_ISPENDR,                    false, true,  true },
-       { 0, },
-};
-
-static struct kvm_inject_desc set_active_fns[] = {
-       /*                                      sgi    ppi    spi */
-       { KVM_WRITE_ISACTIVER,                  true,  true,  true },
-       { 0, },
-};
-
-#define for_each_inject_fn(t, f)                                               \
-       for ((f) = (t); (f)->cmd; (f)++)
-
-#define for_each_supported_inject_fn(args, t, f)                               \
-       for_each_inject_fn(t, f)                                                \
-               if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
-
-#define for_each_supported_activate_fn(args, t, f)                             \
-       for_each_supported_inject_fn((args), (t), (f))
-
-/* Shared between the guest main thread and the IRQ handlers. */
-volatile uint64_t irq_handled;
-volatile uint32_t irqnr_received[MAX_SPI + 1];
-
-static void reset_stats(void)
-{
-       int i;
-
-       irq_handled = 0;
-       for (i = 0; i <= MAX_SPI; i++)
-               irqnr_received[i] = 0;
-}
-
-static uint64_t gic_read_ap1r0(void)
-{
-       uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
-
-       dsb(sy);
-       return reg;
-}
-
-static void gic_write_ap1r0(uint64_t val)
-{
-       write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
-       isb();
-}
-
-static void guest_set_irq_line(uint32_t intid, uint32_t level);
-
-static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
-{
-       uint32_t intid = gic_get_and_ack_irq();
-
-       if (intid == IAR_SPURIOUS)
-               return;
-
-       GUEST_ASSERT(gic_irq_get_active(intid));
-
-       if (!level_sensitive)
-               GUEST_ASSERT(!gic_irq_get_pending(intid));
-
-       if (level_sensitive)
-               guest_set_irq_line(intid, 0);
-
-       GUEST_ASSERT(intid < MAX_SPI);
-       irqnr_received[intid] += 1;
-       irq_handled += 1;
-
-       gic_set_eoi(intid);
-       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
-       if (eoi_split)
-               gic_set_dir(intid);
-
-       GUEST_ASSERT(!gic_irq_get_active(intid));
-       GUEST_ASSERT(!gic_irq_get_pending(intid));
-}
-
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
-               uint32_t num, int level, bool expect_failure)
-{
-       struct kvm_inject_args args = {
-               .cmd = cmd,
-               .first_intid = first_intid,
-               .num = num,
-               .level = level,
-               .expect_failure = expect_failure,
-       };
-       GUEST_SYNC(&args);
-}
-
-#define GUEST_ASSERT_IAR_EMPTY()                                               \
-do {                                                                           \
-       uint32_t _intid;                                                        \
-       _intid = gic_get_and_ack_irq();                                         \
-       GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS);                    \
-} while (0)
-
-#define CAT_HELPER(a, b) a ## b
-#define CAT(a, b) CAT_HELPER(a, b)
-#define PREFIX guest_irq_handler_
-#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
-#define GENERATE_GUEST_IRQ_HANDLER(split, lev)                                 \
-static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs)                 \
-{                                                                              \
-       guest_irq_generic_handler(split, lev);                                  \
-}
-
-GENERATE_GUEST_IRQ_HANDLER(0, 0);
-GENERATE_GUEST_IRQ_HANDLER(0, 1);
-GENERATE_GUEST_IRQ_HANDLER(1, 0);
-GENERATE_GUEST_IRQ_HANDLER(1, 1);
-
-static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
-       {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
-       {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
-};
-
-static void reset_priorities(struct test_args *args)
-{
-       int i;
-
-       for (i = 0; i < args->nr_irqs; i++)
-               gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
-}
-
-static void guest_set_irq_line(uint32_t intid, uint32_t level)
-{
-       kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
-}
-
-static void test_inject_fail(struct test_args *args,
-               uint32_t intid, kvm_inject_cmd cmd)
-{
-       reset_stats();
-
-       _KVM_INJECT(cmd, intid, true);
-       /* no IRQ to handle on entry */
-
-       GUEST_ASSERT_EQ(irq_handled, 0);
-       GUEST_ASSERT_IAR_EMPTY();
-}
-
-static void guest_inject(struct test_args *args,
-               uint32_t first_intid, uint32_t num,
-               kvm_inject_cmd cmd)
-{
-       uint32_t i;
-
-       reset_stats();
-
-       /* Cycle over all priorities to make things more interesting. */
-       for (i = first_intid; i < num + first_intid; i++)
-               gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
-
-       asm volatile("msr daifset, #2" : : : "memory");
-       KVM_INJECT_MULTI(cmd, first_intid, num);
-
-       while (irq_handled < num) {
-               wfi();
-               local_irq_enable();
-               isb(); /* handle IRQ */
-               local_irq_disable();
-       }
-       local_irq_enable();
-
-       GUEST_ASSERT_EQ(irq_handled, num);
-       for (i = first_intid; i < num + first_intid; i++)
-               GUEST_ASSERT_EQ(irqnr_received[i], 1);
-       GUEST_ASSERT_IAR_EMPTY();
-
-       reset_priorities(args);
-}
-
-/*
- * Restore the active state of multiple concurrent IRQs (given by
- * concurrent_irqs).  This does what a live-migration would do on the
- * destination side assuming there are some active IRQs that were not
- * deactivated yet.
- */
-static void guest_restore_active(struct test_args *args,
-               uint32_t first_intid, uint32_t num,
-               kvm_inject_cmd cmd)
-{
-       uint32_t prio, intid, ap1r;
-       int i;
-
-       /*
-        * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
-        * in descending order, so intid+1 can preempt intid.
-        */
-       for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
-               GUEST_ASSERT(prio >= 0);
-               intid = i + first_intid;
-               gic_set_priority(intid, prio);
-       }
-
-       /*
-        * In a real migration, KVM would restore all GIC state before running
-        * guest code.
-        */
-       for (i = 0; i < num; i++) {
-               intid = i + first_intid;
-               KVM_ACTIVATE(cmd, intid);
-               ap1r = gic_read_ap1r0();
-               ap1r |= 1U << i;
-               gic_write_ap1r0(ap1r);
-       }
-
-       /* This is where the "migration" would occur. */
-
-       /* finish handling the IRQs starting with the highest priority one. */
-       for (i = 0; i < num; i++) {
-               intid = num - i - 1 + first_intid;
-               gic_set_eoi(intid);
-               if (args->eoi_split)
-                       gic_set_dir(intid);
-       }
-
-       for (i = 0; i < num; i++)
-               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
-       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
-       GUEST_ASSERT_IAR_EMPTY();
-}
-
-/*
- * Polls the IAR until it's not a spurious interrupt.
- *
- * This function should only be used in test_inject_preemption (with IRQs
- * masked).
- */
-static uint32_t wait_for_and_activate_irq(void)
-{
-       uint32_t intid;
-
-       do {
-               asm volatile("wfi" : : : "memory");
-               intid = gic_get_and_ack_irq();
-       } while (intid == IAR_SPURIOUS);
-
-       return intid;
-}
-
-/*
- * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
- * handle them without handling the actual exceptions.  This is done by masking
- * interrupts for the whole test.
- */
-static void test_inject_preemption(struct test_args *args,
-               uint32_t first_intid, int num,
-               kvm_inject_cmd cmd)
-{
-       uint32_t intid, prio, step = KVM_PRIO_STEPS;
-       int i;
-
-       /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
-        * in descending order, so intid+1 can preempt intid.
-        */
-       for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
-               GUEST_ASSERT(prio >= 0);
-               intid = i + first_intid;
-               gic_set_priority(intid, prio);
-       }
-
-       local_irq_disable();
-
-       for (i = 0; i < num; i++) {
-               uint32_t tmp;
-               intid = i + first_intid;
-               KVM_INJECT(cmd, intid);
-               /* Each successive IRQ will preempt the previous one. */
-               tmp = wait_for_and_activate_irq();
-               GUEST_ASSERT_EQ(tmp, intid);
-               if (args->level_sensitive)
-                       guest_set_irq_line(intid, 0);
-       }
-
-       /* finish handling the IRQs starting with the highest priority one. */
-       for (i = 0; i < num; i++) {
-               intid = num - i - 1 + first_intid;
-               gic_set_eoi(intid);
-               if (args->eoi_split)
-                       gic_set_dir(intid);
-       }
-
-       local_irq_enable();
-
-       for (i = 0; i < num; i++)
-               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
-       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
-       GUEST_ASSERT_IAR_EMPTY();
-
-       reset_priorities(args);
-}
-
-static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
-{
-       uint32_t nr_irqs = args->nr_irqs;
-
-       if (f->sgi) {
-               guest_inject(args, MIN_SGI, 1, f->cmd);
-               guest_inject(args, 0, 16, f->cmd);
-       }
-
-       if (f->ppi)
-               guest_inject(args, MIN_PPI, 1, f->cmd);
-
-       if (f->spi) {
-               guest_inject(args, MIN_SPI, 1, f->cmd);
-               guest_inject(args, nr_irqs - 1, 1, f->cmd);
-               guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
-       }
-}
-
-static void test_injection_failure(struct test_args *args,
-               struct kvm_inject_desc *f)
-{
-       uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
-               test_inject_fail(args, bad_intid[i], f->cmd);
-}
-
-static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
-{
-       /*
-        * Test up to 4 levels of preemption. The reason is that KVM doesn't
-        * currently implement the ability to have more than the number-of-LRs
-        * number of concurrently active IRQs. The number of LRs implemented is
-        * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
-        */
-       if (f->sgi)
-               test_inject_preemption(args, MIN_SGI, 4, f->cmd);
-
-       if (f->ppi)
-               test_inject_preemption(args, MIN_PPI, 4, f->cmd);
-
-       if (f->spi)
-               test_inject_preemption(args, MIN_SPI, 4, f->cmd);
-}
-
-static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
-{
-       /* Test up to 4 active IRQs. Same reason as in test_preemption. */
-       if (f->sgi)
-               guest_restore_active(args, MIN_SGI, 4, f->cmd);
-
-       if (f->ppi)
-               guest_restore_active(args, MIN_PPI, 4, f->cmd);
-
-       if (f->spi)
-               guest_restore_active(args, MIN_SPI, 4, f->cmd);
-}
-
-static void guest_code(struct test_args *args)
-{
-       uint32_t i, nr_irqs = args->nr_irqs;
-       bool level_sensitive = args->level_sensitive;
-       struct kvm_inject_desc *f, *inject_fns;
-
-       gic_init(GIC_V3, 1);
-
-       for (i = 0; i < nr_irqs; i++)
-               gic_irq_enable(i);
-
-       for (i = MIN_SPI; i < nr_irqs; i++)
-               gic_irq_set_config(i, !level_sensitive);
-
-       gic_set_eoi_split(args->eoi_split);
-
-       reset_priorities(args);
-       gic_set_priority_mask(CPU_PRIO_MASK);
-
-       inject_fns  = level_sensitive ? inject_level_fns
-                                     : inject_edge_fns;
-
-       local_irq_enable();
-
-       /* Start the tests. */
-       for_each_supported_inject_fn(args, inject_fns, f) {
-               test_injection(args, f);
-               test_preemption(args, f);
-               test_injection_failure(args, f);
-       }
-
-       /*
-        * Restore the active state of IRQs. This would happen when live
-        * migrating IRQs in the middle of being handled.
-        */
-       for_each_supported_activate_fn(args, set_active_fns, f)
-               test_restore_active(args, f);
-
-       GUEST_DONE();
-}
-
-static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
-                       struct test_args *test_args, bool expect_failure)
-{
-       int ret;
-
-       if (!expect_failure) {
-               kvm_arm_irq_line(vm, intid, level);
-       } else {
-               /* The interface doesn't allow larger intid's. */
-               if (intid > KVM_ARM_IRQ_NUM_MASK)
-                       return;
-
-               ret = _kvm_arm_irq_line(vm, intid, level);
-               TEST_ASSERT(ret != 0 && errno == EINVAL,
-                               "Bad intid %i did not cause KVM_IRQ_LINE "
-                               "error: rc: %i errno: %i", intid, ret, errno);
-       }
-}
-
-void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
-                       bool expect_failure)
-{
-       if (!expect_failure) {
-               kvm_irq_set_level_info(gic_fd, intid, level);
-       } else {
-               int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
-               /*
-                * The kernel silently fails for invalid SPIs and SGIs (which
-                * are not level-sensitive). It only checks for intid to not
-                * spill over 1U << 10 (the max reserved SPI). Also, callers
-                * are supposed to mask the intid with 0x3ff (1023).
-                */
-               if (intid > VGIC_MAX_RESERVED)
-                       TEST_ASSERT(ret != 0 && errno == EINVAL,
-                               "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
-                               "error: rc: %i errno: %i", intid, ret, errno);
-               else
-                       TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
-                               "for intid %i failed, rc: %i errno: %i",
-                               intid, ret, errno);
-       }
-}
-
-static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
-               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
-               bool expect_failure)
-{
-       struct kvm_irq_routing *routing;
-       int ret;
-       uint64_t i;
-
-       assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
-
-       routing = kvm_gsi_routing_create();
-       for (i = intid; i < (uint64_t)intid + num; i++)
-               kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
-
-       if (!expect_failure) {
-               kvm_gsi_routing_write(vm, routing);
-       } else {
-               ret = _kvm_gsi_routing_write(vm, routing);
-               /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
-               if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
-                       TEST_ASSERT(ret != 0 && errno == EINVAL,
-                               "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
-                               "error: rc: %i errno: %i", intid, ret, errno);
-               else
-                       TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
-                               "for intid %i failed, rc: %i errno: %i",
-                               intid, ret, errno);
-       }
-}
-
-static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
-                                       struct kvm_vcpu *vcpu,
-                                       bool expect_failure)
-{
-       /*
-        * Ignore this when expecting failure as invalid intids will lead to
-        * either trying to inject SGIs when we configured the test to be
-        * level_sensitive (or the reverse), or inject large intids which
-        * will lead to writing above the ISPENDR register space (and we
-        * don't want to do that either).
-        */
-       if (!expect_failure)
-               kvm_irq_write_ispendr(gic_fd, intid, vcpu);
-}
-
-static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
-               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
-               bool expect_failure)
-{
-       int fd[MAX_SPI];
-       uint64_t val;
-       int ret, f;
-       uint64_t i;
-
-       /*
-        * There is no way to try injecting an SGI or PPI as the interface
-        * starts counting from the first SPI (above the private ones), so just
-        * exit.
-        */
-       if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
-               return;
-
-       kvm_set_gsi_routing_irqchip_check(vm, intid, num,
-                       kvm_max_routes, expect_failure);
-
-       /*
-        * If expect_failure, then just to inject anyway. These
-        * will silently fail. And in any case, the guest will check
-        * that no actual interrupt was injected for those cases.
-        */
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-               fd[f] = eventfd(0, 0);
-               TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
-       }
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-               struct kvm_irqfd irqfd = {
-                       .fd  = fd[f],
-                       .gsi = i - MIN_SPI,
-               };
-               assert(i <= (uint64_t)UINT_MAX);
-               vm_ioctl(vm, KVM_IRQFD, &irqfd);
-       }
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-               val = 1;
-               ret = write(fd[f], &val, sizeof(uint64_t));
-               TEST_ASSERT(ret == sizeof(uint64_t),
-                           __KVM_SYSCALL_ERROR("write()", ret));
-       }
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
-               close(fd[f]);
-}
-
-/* handles the valid case: intid=0xffffffff num=1 */
-#define for_each_intid(first, num, tmp, i)                                     \
-       for ((tmp) = (i) = (first);                                             \
-               (tmp) < (uint64_t)(first) + (uint64_t)(num);                    \
-               (tmp)++, (i)++)
-
-static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
-                         struct kvm_inject_args *inject_args,
-                         struct test_args *test_args)
-{
-       kvm_inject_cmd cmd = inject_args->cmd;
-       uint32_t intid = inject_args->first_intid;
-       uint32_t num = inject_args->num;
-       int level = inject_args->level;
-       bool expect_failure = inject_args->expect_failure;
-       struct kvm_vm *vm = vcpu->vm;
-       uint64_t tmp;
-       uint32_t i;
-
-       /* handles the valid case: intid=0xffffffff num=1 */
-       assert(intid < UINT_MAX - num || num == 1);
-
-       switch (cmd) {
-       case KVM_INJECT_EDGE_IRQ_LINE:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, 1, test_args,
-                                       expect_failure);
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, 0, test_args,
-                                       expect_failure);
-               break;
-       case KVM_SET_IRQ_LINE:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, level, test_args,
-                                       expect_failure);
-               break;
-       case KVM_SET_IRQ_LINE_HIGH:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, 1, test_args,
-                                       expect_failure);
-               break;
-       case KVM_SET_LEVEL_INFO_HIGH:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_set_level_info_check(gic_fd, i, 1,
-                                       expect_failure);
-               break;
-       case KVM_INJECT_IRQFD:
-               kvm_routing_and_irqfd_check(vm, intid, num,
-                                       test_args->kvm_max_routes,
-                                       expect_failure);
-               break;
-       case KVM_WRITE_ISPENDR:
-               for (i = intid; i < intid + num; i++)
-                       kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
-                                                   expect_failure);
-               break;
-       case KVM_WRITE_ISACTIVER:
-               for (i = intid; i < intid + num; i++)
-                       kvm_irq_write_isactiver(gic_fd, i, vcpu);
-               break;
-       default:
-               break;
-       }
-}
-
-static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
-               struct kvm_inject_args *args)
-{
-       struct kvm_inject_args *kvm_args_hva;
-       vm_vaddr_t kvm_args_gva;
-
-       kvm_args_gva = uc->args[1];
-       kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
-       memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
-}
-
-static void print_args(struct test_args *args)
-{
-       printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
-                       args->nr_irqs, args->level_sensitive,
-                       args->eoi_split);
-}
-
-static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
-{
-       struct ucall uc;
-       int gic_fd;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_inject_args inject_args;
-       vm_vaddr_t args_gva;
-
-       struct test_args args = {
-               .nr_irqs = nr_irqs,
-               .level_sensitive = level_sensitive,
-               .eoi_split = eoi_split,
-               .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
-               .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
-       };
-
-       print_args(&args);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       /* Setup the guest args page (so it gets the args). */
-       args_gva = vm_vaddr_alloc_page(vm);
-       memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
-       vcpu_args_set(vcpu, 1, args_gva);
-
-       gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
-       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
-
-       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
-               guest_irq_handlers[args.eoi_split][args.level_sensitive]);
-
-       while (1) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       kvm_inject_get_call(vm, &uc, &inject_args);
-                       run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       close(gic_fd);
-       kvm_vm_free(vm);
-}
-
-static void help(const char *name)
-{
-       printf(
-       "\n"
-       "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
-       printf(" -n: specify number of IRQs to setup the vgic with. "
-               "It has to be a multiple of 32 and between 64 and 1024.\n");
-       printf(" -e: if 1 then EOI is split into a write to DIR on top "
-               "of writing EOI.\n");
-       printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
-       puts("");
-       exit(1);
-}
-
-int main(int argc, char **argv)
-{
-       uint32_t nr_irqs = 64;
-       bool default_args = true;
-       bool level_sensitive = false;
-       int opt;
-       bool eoi_split = false;
-
-       while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
-               switch (opt) {
-               case 'n':
-                       nr_irqs = atoi_non_negative("Number of IRQs", optarg);
-                       if (nr_irqs > 1024 || nr_irqs % 32)
-                               help(argv[0]);
-                       break;
-               case 'e':
-                       eoi_split = (bool)atoi_paranoid(optarg);
-                       default_args = false;
-                       break;
-               case 'l':
-                       level_sensitive = (bool)atoi_paranoid(optarg);
-                       default_args = false;
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       break;
-               }
-       }
-
-       /*
-        * If the user just specified nr_irqs and/or gic_version, then run all
-        * combinations.
-        */
-       if (default_args) {
-               test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
-               test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
-               test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
-               test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
-       } else {
-               test_vgic(nr_irqs, level_sensitive, eoi_split);
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c

deleted file mode 100644 (file)

index fc4fe52..0000000
--- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
+++ /dev/null
@@ -1,410 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic_lpi_stress - Stress test for KVM's ITS emulation
- *
- * Copyright (c) 2024 Google LLC
- */
-
-#include <linux/sizes.h>
-#include <pthread.h>
-#include <stdatomic.h>
-#include <sys/sysinfo.h>
-
-#include "kvm_util.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "gic_v3_its.h"
-#include "processor.h"
-#include "ucall.h"
-#include "vgic.h"
-
-#define TEST_MEMSLOT_INDEX     1
-
-#define GIC_LPI_OFFSET 8192
-
-static size_t nr_iterations = 1000;
-static vm_paddr_t gpa_base;
-
-static struct kvm_vm *vm;
-static struct kvm_vcpu **vcpus;
-static int gic_fd, its_fd;
-
-static struct test_data {
-       bool            request_vcpus_stop;
-       u32             nr_cpus;
-       u32             nr_devices;
-       u32             nr_event_ids;
-
-       vm_paddr_t      device_table;
-       vm_paddr_t      collection_table;
-       vm_paddr_t      cmdq_base;
-       void            *cmdq_base_va;
-       vm_paddr_t      itt_tables;
-
-       vm_paddr_t      lpi_prop_table;
-       vm_paddr_t      lpi_pend_tables;
-} test_data =  {
-       .nr_cpus        = 1,
-       .nr_devices     = 1,
-       .nr_event_ids   = 16,
-};
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
-       u32 intid = gic_get_and_ack_irq();
-
-       if (intid == IAR_SPURIOUS)
-               return;
-
-       GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
-       gic_set_eoi(intid);
-}
-
-static void guest_setup_its_mappings(void)
-{
-       u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
-       u32 nr_events = test_data.nr_event_ids;
-       u32 nr_devices = test_data.nr_devices;
-       u32 nr_cpus = test_data.nr_cpus;
-
-       for (coll_id = 0; coll_id < nr_cpus; coll_id++)
-               its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
-
-       /* Round-robin the LPIs to all of the vCPUs in the VM */
-       coll_id = 0;
-       for (device_id = 0; device_id < nr_devices; device_id++) {
-               vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
-
-               its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
-                                 itt_base, SZ_64K, true);
-
-               for (event_id = 0; event_id < nr_events; event_id++) {
-                       its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
-                                          event_id, coll_id, intid++);
-
-                       coll_id = (coll_id + 1) % test_data.nr_cpus;
-               }
-       }
-}
-
-static void guest_invalidate_all_rdists(void)
-{
-       int i;
-
-       for (i = 0; i < test_data.nr_cpus; i++)
-               its_send_invall_cmd(test_data.cmdq_base_va, i);
-}
-
-static void guest_setup_gic(void)
-{
-       static atomic_int nr_cpus_ready = 0;
-       u32 cpuid = guest_get_vcpuid();
-
-       gic_init(GIC_V3, test_data.nr_cpus);
-       gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
-                             test_data.lpi_pend_tables + (cpuid * SZ_64K));
-
-       atomic_fetch_add(&nr_cpus_ready, 1);
-
-       if (cpuid > 0)
-               return;
-
-       while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
-               cpu_relax();
-
-       its_init(test_data.collection_table, SZ_64K,
-                test_data.device_table, SZ_64K,
-                test_data.cmdq_base, SZ_64K);
-
-       guest_setup_its_mappings();
-       guest_invalidate_all_rdists();
-}
-
-static void guest_code(size_t nr_lpis)
-{
-       guest_setup_gic();
-
-       GUEST_SYNC(0);
-
-       /*
-        * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
-        * never getting the stop signal.
-        */
-       while (!READ_ONCE(test_data.request_vcpus_stop))
-               cpu_relax();
-
-       GUEST_DONE();
-}
-
-static void setup_memslot(void)
-{
-       size_t pages;
-       size_t sz;
-
-       /*
-        * For the ITS:
-        *  - A single level device table
-        *  - A single level collection table
-        *  - The command queue
-        *  - An ITT for each device
-        */
-       sz = (3 + test_data.nr_devices) * SZ_64K;
-
-       /*
-        * For the redistributors:
-        *  - A shared LPI configuration table
-        *  - An LPI pending table for each vCPU
-        */
-       sz += (1 + test_data.nr_cpus) * SZ_64K;
-
-       pages = sz / vm->page_size;
-       gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
-                                   TEST_MEMSLOT_INDEX, pages, 0);
-}
-
-#define LPI_PROP_DEFAULT_PRIO  0xa0
-
-static void configure_lpis(void)
-{
-       size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
-       u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
-       size_t i;
-
-       for (i = 0; i < nr_lpis; i++) {
-               tbl[i] = LPI_PROP_DEFAULT_PRIO |
-                        LPI_PROP_GROUP1 |
-                        LPI_PROP_ENABLED;
-       }
-}
-
-static void setup_test_data(void)
-{
-       size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
-       u32 nr_devices = test_data.nr_devices;
-       u32 nr_cpus = test_data.nr_cpus;
-       vm_paddr_t cmdq_base;
-
-       test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
-                                                   gpa_base,
-                                                   TEST_MEMSLOT_INDEX);
-
-       test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
-                                                       gpa_base,
-                                                       TEST_MEMSLOT_INDEX);
-
-       cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
-                                      TEST_MEMSLOT_INDEX);
-       virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
-       test_data.cmdq_base = cmdq_base;
-       test_data.cmdq_base_va = (void *)cmdq_base;
-
-       test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
-                                                 gpa_base, TEST_MEMSLOT_INDEX);
-
-       test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
-                                                     gpa_base, TEST_MEMSLOT_INDEX);
-       configure_lpis();
-
-       test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
-                                                      gpa_base, TEST_MEMSLOT_INDEX);
-
-       sync_global_to_guest(vm, test_data);
-}
-
-static void setup_gic(void)
-{
-       gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
-       __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
-
-       its_fd = vgic_its_setup(vm);
-}
-
-static void signal_lpi(u32 device_id, u32 event_id)
-{
-       vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
-
-       struct kvm_msi msi = {
-               .address_lo     = db_addr,
-               .address_hi     = db_addr >> 32,
-               .data           = event_id,
-               .devid          = device_id,
-               .flags          = KVM_MSI_VALID_DEVID,
-       };
-
-       /*
-        * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
-        * which for arm64 implies having a valid translation in the ITS.
-        */
-       TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
-                   "KVM_SIGNAL_MSI ioctl failed");
-}
-
-static pthread_barrier_t test_setup_barrier;
-
-static void *lpi_worker_thread(void *data)
-{
-       u32 device_id = (size_t)data;
-       u32 event_id;
-       size_t i;
-
-       pthread_barrier_wait(&test_setup_barrier);
-
-       for (i = 0; i < nr_iterations; i++)
-               for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
-                       signal_lpi(device_id, event_id);
-
-       return NULL;
-}
-
-static void *vcpu_worker_thread(void *data)
-{
-       struct kvm_vcpu *vcpu = data;
-       struct ucall uc;
-
-       while (true) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       pthread_barrier_wait(&test_setup_barrier);
-                       continue;
-               case UCALL_DONE:
-                       return NULL;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall: %lu", uc.cmd);
-               }
-       }
-
-       return NULL;
-}
-
-static void report_stats(struct timespec delta)
-{
-       double nr_lpis;
-       double time;
-
-       nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
-
-       time = delta.tv_sec;
-       time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
-
-       pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
-}
-
-static void run_test(void)
-{
-       u32 nr_devices = test_data.nr_devices;
-       u32 nr_vcpus = test_data.nr_cpus;
-       pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
-       pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
-       struct timespec start, delta;
-       size_t i;
-
-       TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
-
-       pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
-
-       for (i = 0; i < nr_vcpus; i++)
-               pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
-
-       for (i = 0; i < nr_devices; i++)
-               pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
-
-       pthread_barrier_wait(&test_setup_barrier);
-
-       clock_gettime(CLOCK_MONOTONIC, &start);
-
-       for (i = 0; i < nr_devices; i++)
-               pthread_join(lpi_threads[i], NULL);
-
-       delta = timespec_elapsed(start);
-       write_guest_global(vm, test_data.request_vcpus_stop, true);
-
-       for (i = 0; i < nr_vcpus; i++)
-               pthread_join(vcpu_threads[i], NULL);
-
-       report_stats(delta);
-}
-
-static void setup_vm(void)
-{
-       int i;
-
-       vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
-       TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
-
-       vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
-
-       vm_init_descriptor_tables(vm);
-       for (i = 0; i < test_data.nr_cpus; i++)
-               vcpu_init_descriptor_tables(vcpus[i]);
-
-       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
-
-       setup_memslot();
-
-       setup_gic();
-
-       setup_test_data();
-}
-
-static void destroy_vm(void)
-{
-       close(its_fd);
-       close(gic_fd);
-       kvm_vm_free(vm);
-       free(vcpus);
-}
-
-static void pr_usage(const char *name)
-{
-       pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
-       pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
-       pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
-       pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
-       pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
-}
-
-int main(int argc, char **argv)
-{
-       u32 nr_threads;
-       int c;
-
-       while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
-               switch (c) {
-               case 'v':
-                       test_data.nr_cpus = atoi(optarg);
-                       break;
-               case 'd':
-                       test_data.nr_devices = atoi(optarg);
-                       break;
-               case 'e':
-                       test_data.nr_event_ids = atoi(optarg);
-                       break;
-               case 'i':
-                       nr_iterations = strtoul(optarg, NULL, 0);
-                       break;
-               case 'h':
-               default:
-                       pr_usage(argv[0]);
-                       return 1;
-               }
-       }
-
-       nr_threads = test_data.nr_cpus + test_data.nr_devices;
-       if (nr_threads > get_nprocs())
-               pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
-                        nr_threads, get_nprocs());
-
-       setup_vm();
-
-       run_test();
-
-       destroy_vm();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c

deleted file mode 100644 (file)

index f16b3b2..0000000
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ /dev/null
@@ -1,648 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vpmu_counter_access - Test vPMU event counter access
- *
- * Copyright (c) 2023 Google LLC.
- *
- * This test checks if the guest can see the same number of the PMU event
- * counters (PMCR_EL0.N) that userspace sets, if the guest can access
- * those counters, and if the guest is prevented from accessing any
- * other counters.
- * It also checks if the userspace accesses to the PMU regsisters honor the
- * PMCR.N value that's set for the guest.
- * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
- */
-#include <kvm_util.h>
-#include <processor.h>
-#include <test_util.h>
-#include <vgic.h>
-#include <perf/arm_pmuv3.h>
-#include <linux/bitfield.h>
-
-/* The max number of the PMU event counters (excluding the cycle counter) */
-#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
-
-/* The cycle counter bit position that's common among the PMU registers */
-#define ARMV8_PMU_CYCLE_IDX            31
-
-struct vpmu_vm {
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       int gic_fd;
-};
-
-static struct vpmu_vm vpmu_vm;
-
-struct pmreg_sets {
-       uint64_t set_reg_id;
-       uint64_t clr_reg_id;
-};
-
-#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
-
-static uint64_t get_pmcr_n(uint64_t pmcr)
-{
-       return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
-}
-
-static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
-{
-       u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
-}
-
-static uint64_t get_counters_mask(uint64_t n)
-{
-       uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
-
-       if (n)
-               mask |= GENMASK(n - 1, 0);
-       return mask;
-}
-
-/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
-static inline unsigned long read_sel_evcntr(int sel)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       return read_sysreg(pmxevcntr_el0);
-}
-
-/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
-static inline void write_sel_evcntr(int sel, unsigned long val)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       write_sysreg(val, pmxevcntr_el0);
-       isb();
-}
-
-/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
-static inline unsigned long read_sel_evtyper(int sel)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       return read_sysreg(pmxevtyper_el0);
-}
-
-/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
-static inline void write_sel_evtyper(int sel, unsigned long val)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       write_sysreg(val, pmxevtyper_el0);
-       isb();
-}
-
-static void pmu_disable_reset(void)
-{
-       uint64_t pmcr = read_sysreg(pmcr_el0);
-
-       /* Reset all counters, disabling them */
-       pmcr &= ~ARMV8_PMU_PMCR_E;
-       write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
-       isb();
-}
-
-#define RETURN_READ_PMEVCNTRN(n) \
-       return read_sysreg(pmevcntr##n##_el0)
-static unsigned long read_pmevcntrn(int n)
-{
-       PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
-       return 0;
-}
-
-#define WRITE_PMEVCNTRN(n) \
-       write_sysreg(val, pmevcntr##n##_el0)
-static void write_pmevcntrn(int n, unsigned long val)
-{
-       PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
-       isb();
-}
-
-#define READ_PMEVTYPERN(n) \
-       return read_sysreg(pmevtyper##n##_el0)
-static unsigned long read_pmevtypern(int n)
-{
-       PMEVN_SWITCH(n, READ_PMEVTYPERN);
-       return 0;
-}
-
-#define WRITE_PMEVTYPERN(n) \
-       write_sysreg(val, pmevtyper##n##_el0)
-static void write_pmevtypern(int n, unsigned long val)
-{
-       PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
-       isb();
-}
-
-/*
- * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
- * accessors that test cases will use. Each of the accessors will
- * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
- * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
- * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
- *
- * This is used to test that combinations of those accessors provide
- * the consistent behavior.
- */
-struct pmc_accessor {
-       /* A function to be used to read PMEVTCNTR<n>_EL0 */
-       unsigned long   (*read_cntr)(int idx);
-       /* A function to be used to write PMEVTCNTR<n>_EL0 */
-       void            (*write_cntr)(int idx, unsigned long val);
-       /* A function to be used to read PMEVTYPER<n>_EL0 */
-       unsigned long   (*read_typer)(int idx);
-       /* A function to be used to write PMEVTYPER<n>_EL0 */
-       void            (*write_typer)(int idx, unsigned long val);
-};
-
-struct pmc_accessor pmc_accessors[] = {
-       /* test with all direct accesses */
-       { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
-       /* test with all indirect accesses */
-       { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
-       /* read with direct accesses, and write with indirect accesses */
-       { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
-       /* read with indirect accesses, and write with direct accesses */
-       { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
-};
-
-/*
- * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
- * assuming that the pointer is one of the entries in pmc_accessors[].
- */
-#define PMC_ACC_TO_IDX(acc)    (acc - &pmc_accessors[0])
-
-#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected)                    \
-{                                                                               \
-       uint64_t _tval = read_sysreg(regname);                                   \
-                                                                                \
-       if (set_expected)                                                        \
-               __GUEST_ASSERT((_tval & mask),                                   \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
-                               _tval, mask, set_expected);                      \
-       else                                                                     \
-               __GUEST_ASSERT(!(_tval & mask),                                  \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
-                               _tval, mask, set_expected);                      \
-}
-
-/*
- * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
- * are set or cleared as specified in @set_expected.
- */
-static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
-{
-       GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
-}
-
-/*
- * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
- * to the specified counter (@pmc_idx) can be read/written as expected.
- * When @set_op is true, it tries to set the bit for the counter in
- * those registers by writing the SET registers (the bit won't be set
- * if the counter is not implemented though).
- * Otherwise, it tries to clear the bits in the registers by writing
- * the CLR registers.
- * Then, it checks if the values indicated in the registers are as expected.
- */
-static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
-{
-       uint64_t pmcr_n, test_bit = BIT(pmc_idx);
-       bool set_expected = false;
-
-       if (set_op) {
-               write_sysreg(test_bit, pmcntenset_el0);
-               write_sysreg(test_bit, pmintenset_el1);
-               write_sysreg(test_bit, pmovsset_el0);
-
-               /* The bit will be set only if the counter is implemented */
-               pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
-               set_expected = (pmc_idx < pmcr_n) ? true : false;
-       } else {
-               write_sysreg(test_bit, pmcntenclr_el0);
-               write_sysreg(test_bit, pmintenclr_el1);
-               write_sysreg(test_bit, pmovsclr_el0);
-       }
-       check_bitmap_pmu_regs(test_bit, set_expected);
-}
-
-/*
- * Tests for reading/writing registers for the (implemented) event counter
- * specified by @pmc_idx.
- */
-static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
-{
-       uint64_t write_data, read_data;
-
-       /* Disable all PMCs and reset all PMCs to zero. */
-       pmu_disable_reset();
-
-       /*
-        * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
-        */
-
-       /* Make sure that the bit in those registers are set to 0 */
-       test_bitmap_pmu_regs(pmc_idx, false);
-       /* Test if setting the bit in those registers works */
-       test_bitmap_pmu_regs(pmc_idx, true);
-       /* Test if clearing the bit in those registers works */
-       test_bitmap_pmu_regs(pmc_idx, false);
-
-       /*
-        * Tests for reading/writing the event type register.
-        */
-
-       /*
-        * Set the event type register to an arbitrary value just for testing
-        * of reading/writing the register.
-        * Arm ARM says that for the event from 0x0000 to 0x003F,
-        * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
-        * the value written to the field even when the specified event
-        * is not supported.
-        */
-       write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-       acc->write_typer(pmc_idx, write_data);
-       read_data = acc->read_typer(pmc_idx);
-       __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
-                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
-
-       /*
-        * Tests for reading/writing the event count register.
-        */
-
-       read_data = acc->read_cntr(pmc_idx);
-
-       /* The count value must be 0, as it is disabled and reset */
-       __GUEST_ASSERT(read_data == 0,
-                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
-                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
-
-       write_data = read_data + pmc_idx + 0x12345;
-       acc->write_cntr(pmc_idx, write_data);
-       read_data = acc->read_cntr(pmc_idx);
-       __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
-                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
-}
-
-#define INVALID_EC     (-1ul)
-uint64_t expected_ec = INVALID_EC;
-
-static void guest_sync_handler(struct ex_regs *regs)
-{
-       uint64_t esr, ec;
-
-       esr = read_sysreg(esr_el1);
-       ec = ESR_ELx_EC(esr);
-
-       __GUEST_ASSERT(expected_ec == ec,
-                       "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
-                       regs->pc, esr, ec, expected_ec);
-
-       /* skip the trapping instruction */
-       regs->pc += 4;
-
-       /* Use INVALID_EC to indicate an exception occurred */
-       expected_ec = INVALID_EC;
-}
-
-/*
- * Run the given operation that should trigger an exception with the
- * given exception class. The exception handler (guest_sync_handler)
- * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
- * the instruction that trapped.
- */
-#define TEST_EXCEPTION(ec, ops)                                \
-({                                                     \
-       GUEST_ASSERT(ec != INVALID_EC);                 \
-       WRITE_ONCE(expected_ec, ec);                    \
-       dsb(ish);                                       \
-       ops;                                            \
-       GUEST_ASSERT(expected_ec == INVALID_EC);        \
-})
-
-/*
- * Tests for reading/writing registers for the unimplemented event counter
- * specified by @pmc_idx (>= PMCR_EL0.N).
- */
-static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
-{
-       /*
-        * Reading/writing the event count/type registers should cause
-        * an UNDEFINED exception.
-        */
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
-       /*
-        * The bit corresponding to the (unimplemented) counter in
-        * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
-        */
-       test_bitmap_pmu_regs(pmc_idx, 1);
-       test_bitmap_pmu_regs(pmc_idx, 0);
-}
-
-/*
- * The guest is configured with PMUv3 with @expected_pmcr_n number of
- * event counters.
- * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
- * if reading/writing PMU registers for implemented or unimplemented
- * counters works as expected.
- */
-static void guest_code(uint64_t expected_pmcr_n)
-{
-       uint64_t pmcr, pmcr_n, unimp_mask;
-       int i, pmc;
-
-       __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
-                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
-                       expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
-
-       pmcr = read_sysreg(pmcr_el0);
-       pmcr_n = get_pmcr_n(pmcr);
-
-       /* Make sure that PMCR_EL0.N indicates the value userspace set */
-       __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
-                       "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
-                       expected_pmcr_n, pmcr_n);
-
-       /*
-        * Make sure that (RAZ) bits corresponding to unimplemented event
-        * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
-        * to zero.
-        * (NOTE: bits for implemented event counters are reset to UNKNOWN)
-        */
-       unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
-       check_bitmap_pmu_regs(unimp_mask, false);
-
-       /*
-        * Tests for reading/writing PMU registers for implemented counters.
-        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
-        */
-       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
-               for (pmc = 0; pmc < pmcr_n; pmc++)
-                       test_access_pmc_regs(&pmc_accessors[i], pmc);
-       }
-
-       /*
-        * Tests for reading/writing PMU registers for unimplemented counters.
-        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
-        */
-       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
-               for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
-                       test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
-       }
-
-       GUEST_DONE();
-}
-
-/* Create a VM that has one vCPU with PMUv3 configured. */
-static void create_vpmu_vm(void *guest_code)
-{
-       struct kvm_vcpu_init init;
-       uint8_t pmuver, ec;
-       uint64_t dfr0, irq = 23;
-       struct kvm_device_attr irq_attr = {
-               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
-               .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
-               .addr = (uint64_t)&irq,
-       };
-       struct kvm_device_attr init_attr = {
-               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
-               .attr = KVM_ARM_VCPU_PMU_V3_INIT,
-       };
-
-       /* The test creates the vpmu_vm multiple times. Ensure a clean state */
-       memset(&vpmu_vm, 0, sizeof(vpmu_vm));
-
-       vpmu_vm.vm = vm_create(1);
-       vm_init_descriptor_tables(vpmu_vm.vm);
-       for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
-               vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
-                                       guest_sync_handler);
-       }
-
-       /* Create vCPU with PMUv3 */
-       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
-       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
-       vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
-       vcpu_init_descriptor_tables(vpmu_vm.vcpu);
-       vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
-       __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
-                      "Failed to create vgic-v3, skipping");
-
-       /* Make sure that PMUv3 support is indicated in the ID register */
-       dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
-       pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
-       TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
-                   pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
-                   "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
-
-       /* Initialize vPMU */
-       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
-       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
-}
-
-static void destroy_vpmu_vm(void)
-{
-       close(vpmu_vm.gic_fd);
-       kvm_vm_free(vpmu_vm.vm);
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
-{
-       struct ucall uc;
-
-       vcpu_args_set(vcpu, 1, pmcr_n);
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               break;
-       }
-}
-
-static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
-{
-       struct kvm_vcpu *vcpu;
-       uint64_t pmcr, pmcr_orig;
-
-       create_vpmu_vm(guest_code);
-       vcpu = vpmu_vm.vcpu;
-
-       pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-       pmcr = pmcr_orig;
-
-       /*
-        * Setting a larger value of PMCR.N should not modify the field, and
-        * return a success.
-        */
-       set_pmcr_n(&pmcr, pmcr_n);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
-       pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-
-       if (expect_fail)
-               TEST_ASSERT(pmcr_orig == pmcr,
-                           "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
-                           pmcr, pmcr_n);
-       else
-               TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
-                           "Failed to update PMCR.N to %lu (received: %lu)",
-                           pmcr_n, get_pmcr_n(pmcr));
-}
-
-/*
- * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
- * and run the test.
- */
-static void run_access_test(uint64_t pmcr_n)
-{
-       uint64_t sp;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vcpu_init init;
-
-       pr_debug("Test with pmcr_n %lu\n", pmcr_n);
-
-       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
-       vcpu = vpmu_vm.vcpu;
-
-       /* Save the initial sp to restore them later to run the guest again */
-       sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
-
-       run_vcpu(vcpu, pmcr_n);
-
-       /*
-        * Reset and re-initialize the vCPU, and run the guest code again to
-        * check if PMCR_EL0.N is preserved.
-        */
-       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
-       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
-       aarch64_vcpu_setup(vcpu, &init);
-       vcpu_init_descriptor_tables(vcpu);
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-
-       run_vcpu(vcpu, pmcr_n);
-
-       destroy_vpmu_vm();
-}
-
-static struct pmreg_sets validity_check_reg_sets[] = {
-       PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
-       PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
-       PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
-};
-
-/*
- * Create a VM, and check if KVM handles the userspace accesses of
- * the PMU register sets in @validity_check_reg_sets[] correctly.
- */
-static void run_pmregs_validity_test(uint64_t pmcr_n)
-{
-       int i;
-       struct kvm_vcpu *vcpu;
-       uint64_t set_reg_id, clr_reg_id, reg_val;
-       uint64_t valid_counters_mask, max_counters_mask;
-
-       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
-       vcpu = vpmu_vm.vcpu;
-
-       valid_counters_mask = get_counters_mask(pmcr_n);
-       max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
-
-       for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
-               set_reg_id = validity_check_reg_sets[i].set_reg_id;
-               clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
-
-               /*
-                * Test if the 'set' and 'clr' variants of the registers
-                * are initialized based on the number of valid counters.
-                */
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
-
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
-
-               /*
-                * Using the 'set' variant, force-set the register to the
-                * max number of possible counters and test if KVM discards
-                * the bits for unimplemented counters as it should.
-                */
-               vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
-
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
-
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
-       }
-
-       destroy_vpmu_vm();
-}
-
-/*
- * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
- * the vCPU to @pmcr_n, which is larger than the host value.
- * The attempt should fail as @pmcr_n is too big to set for the vCPU.
- */
-static void run_error_test(uint64_t pmcr_n)
-{
-       pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
-
-       test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
-       destroy_vpmu_vm();
-}
-
-/*
- * Return the default number of implemented PMU event counters excluding
- * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
- */
-static uint64_t get_pmcr_n_limit(void)
-{
-       uint64_t pmcr;
-
-       create_vpmu_vm(guest_code);
-       pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-       destroy_vpmu_vm();
-       return get_pmcr_n(pmcr);
-}
-
-int main(void)
-{
-       uint64_t i, pmcr_n;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
-
-       pmcr_n = get_pmcr_n_limit();
-       for (i = 0; i <= pmcr_n; i++) {
-               run_access_test(i);
-               run_pmregs_validity_test(i);
-       }
-
-       for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
-               run_error_test(i);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c

new file mode 100644 (file)

index 0000000..447d61c
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
+ * and WI from userspace.
+ */
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+#define BAD_ID_REG_VAL 0x1badc0deul
+
+#define GUEST_ASSERT_REG_RAZ(reg)      GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
+
+static void guest_main(void)
+{
+       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
+       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
+       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
+       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
+
+       GUEST_DONE();
+}
+
+static void test_guest_raz(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
+
+static uint64_t raz_wi_reg_ids[] = {
+       KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
+       KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
+};
+
+static void test_user_raz_wi(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
+               uint64_t reg_id = raz_wi_reg_ids[i];
+               uint64_t val;
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+
+               /*
+                * Expect the ioctl to succeed with no effect on the register
+                * value.
+                */
+               vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+       }
+}
+
+static uint64_t raz_invariant_reg_ids[] = {
+       KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
+       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
+       KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
+       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
+};
+
+static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
+{
+       int i, r;
+
+       for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
+               uint64_t reg_id = raz_invariant_reg_ids[i];
+               uint64_t val;
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+
+               r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+               TEST_ASSERT(r < 0 && errno == EINVAL,
+                           "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+       }
+}
+
+
+
+static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
+{
+       uint64_t val, el0;
+
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+       return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       TEST_REQUIRE(vcpu_aarch64_only(vcpu));
+
+       test_user_raz_wi(vcpu);
+       test_user_raz_invariant(vcpu);
+       test_guest_raz(vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c

new file mode 100644 (file)

index 0000000..eeba1cc
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+#include "arch_timer.h"
+#include "delay.h"
+#include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+#include "vgic.h"
+
+enum guest_stage {
+       GUEST_STAGE_VTIMER_CVAL = 1,
+       GUEST_STAGE_VTIMER_TVAL,
+       GUEST_STAGE_PTIMER_CVAL,
+       GUEST_STAGE_PTIMER_TVAL,
+       GUEST_STAGE_MAX,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+       switch (shared_data->guest_stage) {
+       case GUEST_STAGE_VTIMER_CVAL:
+               timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(VIRTUAL);
+               timer_set_ctl(VIRTUAL, CTL_ENABLE);
+               break;
+       case GUEST_STAGE_VTIMER_TVAL:
+               timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(VIRTUAL);
+               timer_set_ctl(VIRTUAL, CTL_ENABLE);
+               break;
+       case GUEST_STAGE_PTIMER_CVAL:
+               timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(PHYSICAL);
+               timer_set_ctl(PHYSICAL, CTL_ENABLE);
+               break;
+       case GUEST_STAGE_PTIMER_TVAL:
+               timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(PHYSICAL);
+               timer_set_ctl(PHYSICAL, CTL_ENABLE);
+               break;
+       default:
+               GUEST_ASSERT(0);
+       }
+}
+
+static void guest_validate_irq(unsigned int intid,
+                               struct test_vcpu_shared_data *shared_data)
+{
+       enum guest_stage stage = shared_data->guest_stage;
+       uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+       unsigned long xctl = 0;
+       unsigned int timer_irq = 0;
+       unsigned int accessor;
+
+       if (intid == IAR_SPURIOUS)
+               return;
+
+       switch (stage) {
+       case GUEST_STAGE_VTIMER_CVAL:
+       case GUEST_STAGE_VTIMER_TVAL:
+               accessor = VIRTUAL;
+               timer_irq = vtimer_irq;
+               break;
+       case GUEST_STAGE_PTIMER_CVAL:
+       case GUEST_STAGE_PTIMER_TVAL:
+               accessor = PHYSICAL;
+               timer_irq = ptimer_irq;
+               break;
+       default:
+               GUEST_ASSERT(0);
+               return;
+       }
+
+       xctl = timer_get_ctl(accessor);
+       if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+               return;
+
+       timer_set_ctl(accessor, CTL_IMASK);
+       xcnt = timer_get_cntct(accessor);
+       cval = timer_get_cval(accessor);
+
+       xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+       /* Make sure we are dealing with the correct timer IRQ */
+       GUEST_ASSERT_EQ(intid, timer_irq);
+
+       /* Basic 'timer condition met' check */
+       __GUEST_ASSERT(xcnt >= cval,
+                      "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
+                      xcnt, cval, xcnt_diff_us);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
+
+       WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       unsigned int intid = gic_get_and_ack_irq();
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       guest_validate_irq(intid, shared_data);
+
+       gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+                               enum guest_stage stage)
+{
+       uint32_t irq_iter, config_iter;
+
+       shared_data->guest_stage = stage;
+       shared_data->nr_iter = 0;
+
+       for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+               /* Setup the next interrupt */
+               guest_configure_timer_action(shared_data);
+
+               /* Setup a timeout for the interrupt to arrive */
+               udelay(msecs_to_usecs(test_args.timer_period_ms) +
+                       test_args.timer_err_margin_us);
+
+               irq_iter = READ_ONCE(shared_data->nr_iter);
+               __GUEST_ASSERT(config_iter + 1 == irq_iter,
+                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+                               "  Guest timer interrupt was not triggered within the specified\n"
+                               "  interval, try to increase the error margin by [-e] option.\n",
+                               config_iter + 1, irq_iter);
+       }
+}
+
+static void guest_code(void)
+{
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       local_irq_disable();
+
+       gic_init(GIC_V3, test_args.nr_vcpus);
+
+       timer_set_ctl(VIRTUAL, CTL_IMASK);
+       timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+       gic_irq_enable(vtimer_irq);
+       gic_irq_enable(ptimer_irq);
+       local_irq_enable();
+
+       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+       GUEST_DONE();
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+       /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+       sync_global_to_guest(vm, ptimer_irq);
+       sync_global_to_guest(vm, vtimer_irq);
+
+       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static int gic_fd;
+
+struct kvm_vm *test_vm_create(void)
+{
+       struct kvm_vm *vm;
+       unsigned int i;
+       int nr_vcpus = test_args.nr_vcpus;
+
+       vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+
+       vm_init_descriptor_tables(vm);
+       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+       if (!test_args.reserved) {
+               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+                       struct kvm_arm_counter_offset offset = {
+                               .counter_offset = test_args.counter_offset,
+                               .reserved = 0,
+                       };
+                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+               } else
+                       TEST_FAIL("no support for global offset");
+       }
+
+       for (i = 0; i < nr_vcpus; i++)
+               vcpu_init_descriptor_tables(vcpus[i]);
+
+       test_init_timer_irq(vm);
+       gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
+       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
+
+       /* Make all the test's cmdline args visible to the guest */
+       sync_global_to_guest(vm, test_args);
+
+       return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+       close(gic_fd);
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c

new file mode 100644 (file)

index 0000000..a36a7e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+static const uint64_t CVAL_MAX = ~0ULL;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* A nice counter value to use as the starting one for most tests. */
+static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+       atomic_t handled;
+       atomic_t spurious;
+} shared_data;
+
+struct test_args {
+       /* Virtual or physical timer and counter tests. */
+       enum arch_timer timer;
+       /* Delay used for most timer tests. */
+       uint64_t wait_ms;
+       /* Delay used in the test_long_timer_delays test. */
+       uint64_t long_wait_ms;
+       /* Number of iterations. */
+       int iterations;
+       /* Whether to test the physical timer. */
+       bool test_physical;
+       /* Whether to test the virtual timer. */
+       bool test_virtual;
+};
+
+struct test_args test_args = {
+       .wait_ms = WAIT_TEST_MS,
+       .long_wait_ms = LONG_WAIT_TEST_MS,
+       .iterations = NR_TEST_ITERS_DEF,
+       .test_physical = true,
+       .test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+       SET_COUNTER_VALUE,
+       USERSPACE_USLEEP,
+       USERSPACE_SCHED_YIELD,
+       USERSPACE_MIGRATE_SELF,
+       NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+       sleep_poll,
+       sleep_sched_poll,
+       sleep_migrate,
+       sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+       wait_for_non_spurious_irq,
+       wait_poll_for_irq,
+       wait_sched_poll_for_irq,
+       wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+       TIMER_CVAL,
+       TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+       int h = atomic_read(&shared_data.handled);
+
+       __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+       GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+       userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+       GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+       GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       unsigned int intid = gic_get_and_ack_irq();
+       enum arch_timer timer;
+       uint64_t cnt, cval;
+       uint32_t ctl;
+       bool timer_condition, istatus;
+
+       if (intid == IAR_SPURIOUS) {
+               atomic_inc(&shared_data.spurious);
+               goto out;
+       }
+
+       if (intid == ptimer_irq)
+               timer = PHYSICAL;
+       else if (intid == vtimer_irq)
+               timer = VIRTUAL;
+       else
+               goto out;
+
+       ctl = timer_get_ctl(timer);
+       cval = timer_get_cval(timer);
+       cnt = timer_get_cntct(timer);
+       timer_condition = cnt >= cval;
+       istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+       GUEST_ASSERT_EQ(timer_condition, istatus);
+
+       /* Disable and mask the timer. */
+       timer_set_ctl(timer, CTL_IMASK);
+
+       atomic_inc(&shared_data.handled);
+
+out:
+       gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+                        uint32_t ctl)
+{
+       atomic_set(&shared_data.handled, 0);
+       atomic_set(&shared_data.spurious, 0);
+       timer_set_cval(timer, cval_cycles);
+       timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+                        uint32_t ctl)
+{
+       atomic_set(&shared_data.handled, 0);
+       atomic_set(&shared_data.spurious, 0);
+       timer_set_ctl(timer, ctl);
+       timer_set_tval(timer, tval_cycles);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+                        enum timer_view tv)
+{
+       switch (tv) {
+       case TIMER_CVAL:
+               set_cval_irq(timer, xval, ctl);
+               break;
+       case TIMER_TVAL:
+               set_tval_irq(timer, xval, ctl);
+               break;
+       default:
+               GUEST_FAIL("Could not get timer %d", timer);
+       }
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+       int h;
+
+       local_irq_disable();
+
+       for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+               wfi();
+               local_irq_enable();
+               isb(); /* handle IRQ */
+               local_irq_disable();
+       }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+       int h;
+
+       local_irq_disable();
+
+       h = atomic_read(&shared_data.handled);
+
+       local_irq_enable();
+       while (h == atomic_read(&shared_data.handled)) {
+               if (usp_cmd == NO_USERSPACE_CMD)
+                       cpu_relax();
+               else
+                       userspace_cmd(usp_cmd);
+       }
+       local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+       poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+       poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+       poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+                      enum sync_cmd usp_cmd)
+{
+       uint64_t cycles = usec_to_cycles(usec);
+       /* Whichever timer we are testing with, sleep with the other. */
+       enum arch_timer sleep_timer = 1 - test_timer;
+       uint64_t start = timer_get_cntct(sleep_timer);
+
+       while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+               if (usp_cmd == NO_USERSPACE_CMD)
+                       cpu_relax();
+               else
+                       userspace_cmd(usp_cmd);
+       }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+       guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+       guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+       guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+       userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+       set_counter(timer, cnt);
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+                           enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+                           uint64_t reset_cnt)
+{
+       local_irq_disable();
+
+       if (reset_state)
+               reset_timer_state(timer, reset_cnt);
+
+       set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+                           irq_wait_method_t wm, bool reset_state,
+                           uint64_t reset_cnt)
+{
+       test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+                           irq_wait_method_t wm, bool reset_state,
+                           uint64_t reset_cnt)
+{
+       test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+                       reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+                                  uint64_t usec, enum timer_view timer_view,
+                                  sleep_method_t guest_sleep)
+{
+       local_irq_disable();
+
+       set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+       guest_sleep(timer, usec);
+
+       local_irq_enable();
+       isb();
+
+       /* Assume success (no IRQ) after waiting usec microseconds */
+       assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+                            uint64_t usec, sleep_method_t wm)
+{
+       test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+                            sleep_method_t wm)
+{
+       /* tval will be cast to an int32_t in test_xval_check_no_irq */
+       test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+       reset_timer_state(timer, DEF_CNT);
+       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+       /* Unmask the timer, and then get an IRQ. */
+       local_irq_disable();
+       timer_set_ctl(timer, CTL_ENABLE);
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wait_for_non_spurious_irq();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+       reset_timer_state(timer, DEF_CNT);
+
+       /* Local IRQs are not masked at this point. */
+
+       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+       sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+       assert_irqs_handled(0);
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+                                            irq_wait_method_t wm, int num)
+{
+       int i;
+
+       local_irq_disable();
+       reset_timer_state(timer, DEF_CNT);
+
+       set_tval_irq(timer, 0, CTL_ENABLE);
+
+       for (i = 1; i <= num; i++) {
+               /* This method re-enables IRQs to handle the one we're looking for. */
+               wm();
+
+               /* The IRQ handler masked and disabled the timer.
+                * Enable and unmmask it again.
+                */
+               timer_set_ctl(timer, CTL_ENABLE);
+
+               assert_irqs_handled(i);
+       }
+
+       local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+               test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+                                    int32_t delta_1_ms, int32_t delta_2_ms)
+{
+       local_irq_disable();
+       reset_timer_state(timer, DEF_CNT);
+
+       /* Program the timer to DEF_CNT + delta_1_ms. */
+       set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+       /* Reprogram the timer to DEF_CNT + delta_2_ms. */
+       timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+       GUEST_ASSERT(timer_get_cntct(timer) >=
+                    DEF_CNT + msec_to_cycles(delta_2_ms));
+
+       local_irq_enable();
+       assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+       int i;
+       uint64_t base_wait = test_args.wait_ms;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               /*
+                * Ensure reprogramming works whether going from a
+                * longer time to a shorter or vice versa.
+                */
+               test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+                                        base_wait);
+               test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+                                        2 * base_wait);
+       }
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+       int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               test_timer_cval(timer, cval, wm, true, DEF_CNT);
+               test_timer_tval(timer, tval, wm, true, DEF_CNT);
+       }
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+       reset_timer_state(timer, DEF_CNT);
+
+       local_irq_disable();
+
+       /* cval in the past */
+       timer_set_cval(timer,
+                      timer_get_cntct(timer) -
+                      msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+       /* tval in the past */
+       timer_set_tval(timer, -1);
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+       /* tval larger than TVAL_MAX. This requires programming with
+        * timer_set_cval instead so the value is expressible
+        */
+       timer_set_cval(timer,
+                      timer_get_cntct(timer) + TVAL_MAX +
+                      msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+       /*
+        * tval larger than 2 * TVAL_MAX.
+        * Twice the TVAL_MAX completely loops around the TVAL.
+        */
+       timer_set_cval(timer,
+                      timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+                      msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_tval(timer) <=
+                      msec_to_cycles(test_args.wait_ms));
+
+       /* negative tval that rollovers from 0. */
+       set_counter(timer, msec_to_cycles(1));
+       timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+       /* tval should keep down-counting from 0 to -1. */
+       timer_set_tval(timer, 0);
+       sleep_poll(timer, 1);
+       GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+       local_irq_enable();
+
+       /* Mask and disable any pending timer. */
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+       timers_sanity_checks(timer, false);
+       /* Check how KVM saves/restores these edge-case values. */
+       timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+       local_irq_disable();
+       reset_timer_state(timer, DEF_CNT);
+
+       set_cval_irq(timer,
+                    (uint64_t) TVAL_MAX +
+                    msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+       set_counter(timer, TVAL_MAX);
+
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+       uint64_t cval;
+       int i;
+
+       /*
+        * Test that the system is not implementing cval in terms of
+        * tval.  If that was the case, setting a cval to "cval = now
+        * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+        * wait_ms", and the timer would fire immediately. Test that it
+        * doesn't.
+        */
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               reset_timer_state(timer, DEF_CNT);
+               cval = timer_get_cntct(timer) + TVAL_MAX +
+                       msec_to_cycles(test_args.wait_ms);
+               test_cval_no_irq(timer, cval,
+                                msecs_to_usecs(test_args.wait_ms) +
+                                TIMEOUT_NO_IRQ_US, sleep_method[i]);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               /* Get the IRQ by moving the counter forward. */
+               test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+       }
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests.  It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+                                   uint64_t xval, uint64_t cnt_2,
+                                   irq_wait_method_t wm, enum timer_view tv)
+{
+       local_irq_disable();
+
+       set_counter(timer, cnt_1);
+       timer_set_ctl(timer, CTL_IMASK);
+
+       set_xval_irq(timer, xval, CTL_ENABLE, tv);
+       set_counter(timer, cnt_2);
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests.  It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+                                          uint64_t cnt_1, uint64_t xval,
+                                          uint64_t cnt_2,
+                                          sleep_method_t guest_sleep,
+                                          enum timer_view tv)
+{
+       local_irq_disable();
+
+       set_counter(timer, cnt_1);
+       timer_set_ctl(timer, CTL_IMASK);
+
+       set_xval_irq(timer, xval, CTL_ENABLE, tv);
+       set_counter(timer, cnt_2);
+       guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+       local_irq_enable();
+       isb();
+
+       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+       assert_irqs_handled(0);
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+                                   int32_t tval, uint64_t cnt_2,
+                                   irq_wait_method_t wm)
+{
+       test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+                                   uint64_t cval, uint64_t cnt_2,
+                                   irq_wait_method_t wm)
+{
+       test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+                                          uint64_t cnt_1, int32_t tval,
+                                          uint64_t cnt_2, sleep_method_t wm)
+{
+       test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+                                      TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+                                          uint64_t cnt_1, uint64_t cval,
+                                          uint64_t cnt_2, sleep_method_t wm)
+{
+       test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+                                      TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+       int i;
+       int32_t tval;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+               test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+               /* Move counter ahead of negative tval. */
+               test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+               test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+               tval = TVAL_MAX;
+               test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+                                       wm);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               sleep_method_t sm = sleep_method[i];
+
+               test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+       }
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               sleep_method_t sm = sleep_method[i];
+
+               test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+                                              sm);
+               test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+       }
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+       int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+       uint64_t cval;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               /* set a timer wait_ms the past. */
+               cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+               test_timer_cval(timer, cval, wm, true, DEF_CNT);
+               test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+               /* Set a timer to counter=0 (in the past) */
+               test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+               /* Set a time for tval=0 (now) */
+               test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+               /* Set a timer to as far in the past as possible */
+               test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+       }
+
+       /*
+        * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+        * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+        */
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               sleep_method_t sm = sleep_method[i];
+
+               set_counter(timer, msec_to_cycles(test_args.wait_ms));
+               test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+       }
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+       int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               test_timer_cval(timer, cval, wm, true, DEF_CNT);
+               test_timer_tval(timer, tval, wm, true, DEF_CNT);
+       }
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+       test_basic_functionality(timer);
+       test_timers_sanity_checks(timer);
+
+       test_timers_above_tval_max(timer);
+       test_timers_in_the_past(timer);
+
+       test_move_counters_ahead_of_timers(timer);
+       test_move_counters_behind_timers(timer);
+       test_reprogram_timers(timer);
+
+       test_timers_fired_multiple_times(timer);
+
+       test_timer_control_mask_then_unmask(timer);
+       test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+       int i;
+
+       local_irq_disable();
+
+       gic_init(GIC_V3, 1);
+
+       timer_set_ctl(VIRTUAL, CTL_IMASK);
+       timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+       gic_irq_enable(vtimer_irq);
+       gic_irq_enable(ptimer_irq);
+       local_irq_enable();
+
+       for (i = 0; i < test_args.iterations; i++) {
+               GUEST_SYNC(i);
+               guest_run_iteration(timer);
+       }
+
+       test_long_timer_delays(timer);
+       GUEST_DONE();
+}
+
+static uint32_t next_pcpu(void)
+{
+       uint32_t max = get_nprocs();
+       uint32_t cur = sched_getcpu();
+       uint32_t next = cur;
+       cpu_set_t cpuset;
+
+       TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+       sched_getaffinity(0, sizeof(cpuset), &cpuset);
+
+       do {
+               next = (next + 1) % CPU_SETSIZE;
+       } while (!CPU_ISSET(next, &cpuset));
+
+       return next;
+}
+
+static void migrate_self(uint32_t new_pcpu)
+{
+       int ret;
+       cpu_set_t cpuset;
+       pthread_t thread;
+
+       thread = pthread_self();
+
+       CPU_ZERO(&cpuset);
+       CPU_SET(new_pcpu, &cpuset);
+
+       pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
+
+       ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+
+       TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
+                   new_pcpu, ret);
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+                          enum arch_timer timer)
+{
+       if (timer == PHYSICAL)
+               vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+       else
+               vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+       enum sync_cmd cmd = uc->args[1];
+       uint64_t val = uc->args[2];
+       enum arch_timer timer = uc->args[3];
+
+       switch (cmd) {
+       case SET_COUNTER_VALUE:
+               kvm_set_cntxct(vcpu, val, timer);
+               break;
+       case USERSPACE_USLEEP:
+               usleep(val);
+               break;
+       case USERSPACE_SCHED_YIELD:
+               sched_yield();
+               break;
+       case USERSPACE_MIGRATE_SELF:
+               migrate_self(next_pcpu());
+               break;
+       default:
+               break;
+       }
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       /* Start on CPU 0 */
+       migrate_self(0);
+
+       while (true) {
+               vcpu_run(vcpu);
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       handle_sync(vcpu, &uc);
+                       break;
+               case UCALL_DONE:
+                       goto out;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       goto out;
+               default:
+                       TEST_FAIL("Unexpected guest exit\n");
+               }
+       }
+
+ out:
+       return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+       sync_global_to_guest(vm, ptimer_irq);
+       sync_global_to_guest(vm, vtimer_irq);
+
+       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+                          enum arch_timer timer)
+{
+       *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+       vm_init_descriptor_tables(*vm);
+       vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+                                    guest_irq_handler);
+
+       vcpu_init_descriptor_tables(*vcpu);
+       vcpu_args_set(*vcpu, 1, timer);
+
+       test_init_timer_irq(*vm, *vcpu);
+       vgic_v3_setup(*vm, 1, 64);
+       sync_global_to_guest(*vm, test_args);
+}
+
+static void test_print_help(char *name)
+{
+       pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+               , name);
+       pr_info("\t-i: Number of iterations (default: %u)\n",
+               NR_TEST_ITERS_DEF);
+       pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+       pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+            LONG_WAIT_TEST_MS);
+       pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+               WAIT_TEST_MS);
+       pr_info("\t-p: Test physical timer (default: true)\n");
+       pr_info("\t-v: Test virtual timer (default: true)\n");
+       pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+               switch (opt) {
+               case 'b':
+                       test_args.test_physical = true;
+                       test_args.test_virtual = true;
+                       break;
+               case 'i':
+                       test_args.iterations =
+                           atoi_positive("Number of iterations", optarg);
+                       break;
+               case 'l':
+                       test_args.long_wait_ms =
+                           atoi_positive("Long wait time", optarg);
+                       break;
+               case 'p':
+                       test_args.test_physical = true;
+                       test_args.test_virtual = false;
+                       break;
+               case 'v':
+                       test_args.test_virtual = true;
+                       test_args.test_physical = false;
+                       break;
+               case 'w':
+                       test_args.wait_ms = atoi_positive("Wait time", optarg);
+                       break;
+               case 'h':
+               default:
+                       goto err;
+               }
+       }
+
+       return true;
+
+ err:
+       test_print_help(argv[0]);
+       return false;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Tell stdout not to buffer its content */
+       setbuf(stdout, NULL);
+
+       if (!parse_args(argc, argv))
+               exit(KSFT_SKIP);
+
+       if (test_args.test_virtual) {
+               test_vm_create(&vm, &vcpu, VIRTUAL);
+               test_run(vm, vcpu);
+               kvm_vm_free(vm);
+       }
+
+       if (test_args.test_physical) {
+               test_vm_create(&vm, &vcpu, PHYSICAL);
+               test_run(vm, vcpu);
+               kvm_vm_free(vm);
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c

new file mode 100644 (file)

index 0000000..c7fb55c
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <linux/bitfield.h>
+
+#define MDSCR_KDE      (1 << 13)
+#define MDSCR_MDE      (1 << 15)
+#define MDSCR_SS       (1 << 0)
+
+#define DBGBCR_LEN8    (0xff << 5)
+#define DBGBCR_EXEC    (0x0 << 3)
+#define DBGBCR_EL1     (0x1 << 1)
+#define DBGBCR_E       (0x1 << 0)
+#define DBGBCR_LBN_SHIFT       16
+#define DBGBCR_BT_SHIFT                20
+#define DBGBCR_BT_ADDR_LINK_CTX        (0x1 << DBGBCR_BT_SHIFT)
+#define DBGBCR_BT_CTX_LINK     (0x3 << DBGBCR_BT_SHIFT)
+
+#define DBGWCR_LEN8    (0xff << 5)
+#define DBGWCR_RD      (0x1 << 3)
+#define DBGWCR_WR      (0x2 << 3)
+#define DBGWCR_EL1     (0x1 << 1)
+#define DBGWCR_E       (0x1 << 0)
+#define DBGWCR_LBN_SHIFT       16
+#define DBGWCR_WT_SHIFT                20
+#define DBGWCR_WT_LINK         (0x1 << DBGWCR_WT_SHIFT)
+
+#define SPSR_D         (1 << 9)
+#define SPSR_SS                (1 << 21)
+
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
+extern unsigned char iter_ss_begin, iter_ss_end;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define  PC(v)  ((uint64_t)&(v))
+
+#define GEN_DEBUG_WRITE_REG(reg_name)                  \
+static void write_##reg_name(int num, uint64_t val)    \
+{                                                      \
+       switch (num) {                                  \
+       case 0:                                         \
+               write_sysreg(val, reg_name##0_el1);     \
+               break;                                  \
+       case 1:                                         \
+               write_sysreg(val, reg_name##1_el1);     \
+               break;                                  \
+       case 2:                                         \
+               write_sysreg(val, reg_name##2_el1);     \
+               break;                                  \
+       case 3:                                         \
+               write_sysreg(val, reg_name##3_el1);     \
+               break;                                  \
+       case 4:                                         \
+               write_sysreg(val, reg_name##4_el1);     \
+               break;                                  \
+       case 5:                                         \
+               write_sysreg(val, reg_name##5_el1);     \
+               break;                                  \
+       case 6:                                         \
+               write_sysreg(val, reg_name##6_el1);     \
+               break;                                  \
+       case 7:                                         \
+               write_sysreg(val, reg_name##7_el1);     \
+               break;                                  \
+       case 8:                                         \
+               write_sysreg(val, reg_name##8_el1);     \
+               break;                                  \
+       case 9:                                         \
+               write_sysreg(val, reg_name##9_el1);     \
+               break;                                  \
+       case 10:                                        \
+               write_sysreg(val, reg_name##10_el1);    \
+               break;                                  \
+       case 11:                                        \
+               write_sysreg(val, reg_name##11_el1);    \
+               break;                                  \
+       case 12:                                        \
+               write_sysreg(val, reg_name##12_el1);    \
+               break;                                  \
+       case 13:                                        \
+               write_sysreg(val, reg_name##13_el1);    \
+               break;                                  \
+       case 14:                                        \
+               write_sysreg(val, reg_name##14_el1);    \
+               break;                                  \
+       case 15:                                        \
+               write_sysreg(val, reg_name##15_el1);    \
+               break;                                  \
+       default:                                        \
+               GUEST_ASSERT(0);                        \
+       }                                               \
+}
+
+/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
+GEN_DEBUG_WRITE_REG(dbgbcr)
+GEN_DEBUG_WRITE_REG(dbgbvr)
+GEN_DEBUG_WRITE_REG(dbgwcr)
+GEN_DEBUG_WRITE_REG(dbgwvr)
+
+static void reset_debug_state(void)
+{
+       uint8_t brps, wrps, i;
+       uint64_t dfr0;
+
+       asm volatile("msr daifset, #8");
+
+       write_sysreg(0, osdlr_el1);
+       write_sysreg(0, oslar_el1);
+       isb();
+
+       write_sysreg(0, mdscr_el1);
+       write_sysreg(0, contextidr_el1);
+
+       /* Reset all bcr/bvr/wcr/wvr registers */
+       dfr0 = read_sysreg(id_aa64dfr0_el1);
+       brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+       for (i = 0; i <= brps; i++) {
+               write_dbgbcr(i, 0);
+               write_dbgbvr(i, 0);
+       }
+       wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+       for (i = 0; i <= wrps; i++) {
+               write_dbgwcr(i, 0);
+               write_dbgwvr(i, 0);
+       }
+
+       isb();
+}
+
+static void enable_os_lock(void)
+{
+       write_sysreg(1, oslar_el1);
+       isb();
+
+       GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
+static void enable_monitor_debug_exceptions(void)
+{
+       uint32_t mdscr;
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+       write_sysreg(mdscr, mdscr_el1);
+       isb();
+}
+
+static void install_wp(uint8_t wpn, uint64_t addr)
+{
+       uint32_t wcr;
+
+       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+       write_dbgwcr(wpn, wcr);
+       write_dbgwvr(wpn, addr);
+
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+static void install_hw_bp(uint8_t bpn, uint64_t addr)
+{
+       uint32_t bcr;
+
+       bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+       write_dbgbcr(bpn, bcr);
+       write_dbgbvr(bpn, addr);
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
+                          uint64_t ctx)
+{
+       uint32_t wcr;
+       uint64_t ctx_bcr;
+
+       /* Setup a context-aware breakpoint for Linked Context ID Match */
+       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+                 DBGBCR_BT_CTX_LINK;
+       write_dbgbcr(ctx_bp, ctx_bcr);
+       write_dbgbvr(ctx_bp, ctx);
+
+       /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
+       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
+             DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+       write_dbgwcr(addr_wp, wcr);
+       write_dbgwvr(addr_wp, addr);
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
+                      uint64_t ctx)
+{
+       uint32_t addr_bcr, ctx_bcr;
+
+       /* Setup a context-aware breakpoint for Linked Context ID Match */
+       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+                 DBGBCR_BT_CTX_LINK;
+       write_dbgbcr(ctx_bp, ctx_bcr);
+       write_dbgbvr(ctx_bp, ctx);
+
+       /*
+        * Setup a normal breakpoint for Linked Address Match, and link it
+        * to the context-aware breakpoint.
+        */
+       addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+                  DBGBCR_BT_ADDR_LINK_CTX |
+                  ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+       write_dbgbcr(addr_bp, addr_bcr);
+       write_dbgbvr(addr_bp, addr);
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+static void install_ss(void)
+{
+       uint32_t mdscr;
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+       write_sysreg(mdscr, mdscr_el1);
+       isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+       uint64_t ctx = 0xabcdef;        /* a random context number */
+
+       /* Software-breakpoint */
+       reset_debug_state();
+       asm volatile("sw_bp: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+       /* Hardware-breakpoint */
+       reset_debug_state();
+       install_hw_bp(bpn, PC(hw_bp));
+       asm volatile("hw_bp: nop");
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+       /* Hardware-breakpoint + svc */
+       reset_debug_state();
+       install_hw_bp(bpn, PC(bp_svc));
+       asm volatile("bp_svc: svc #0");
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+       GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+       /* Hardware-breakpoint + software-breakpoint */
+       reset_debug_state();
+       install_hw_bp(bpn, PC(bp_brk));
+       asm volatile("bp_brk: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+       /* Watchpoint */
+       reset_debug_state();
+       install_wp(wpn, PC(write_data));
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+       /* Single-step */
+       reset_debug_state();
+       install_ss();
+       ss_idx = 0;
+       asm volatile("ss_start:\n"
+                    "mrs x0, esr_el1\n"
+                    "add x0, x0, #1\n"
+                    "msr daifset, #8\n"
+                    : : : "x0");
+       GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+       GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+       GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+       /* OS Lock does not block software-breakpoint */
+       reset_debug_state();
+       enable_os_lock();
+       sw_bp_addr = 0;
+       asm volatile("sw_bp2: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+       /* OS Lock blocking hardware-breakpoint */
+       reset_debug_state();
+       enable_os_lock();
+       install_hw_bp(bpn, PC(hw_bp2));
+       hw_bp_addr = 0;
+       asm volatile("hw_bp2: nop");
+       GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+       /* OS Lock blocking watchpoint */
+       reset_debug_state();
+       enable_os_lock();
+       write_data = '\0';
+       wp_data_addr = 0;
+       install_wp(wpn, PC(write_data));
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+       /* OS Lock blocking single-step */
+       reset_debug_state();
+       enable_os_lock();
+       ss_addr[0] = 0;
+       install_ss();
+       ss_idx = 0;
+       asm volatile("mrs x0, esr_el1\n\t"
+                    "add x0, x0, #1\n\t"
+                    "msr daifset, #8\n\t"
+                    : : : "x0");
+       GUEST_ASSERT_EQ(ss_addr[0], 0);
+
+       /* Linked hardware-breakpoint */
+       hw_bp_addr = 0;
+       reset_debug_state();
+       install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
+       /* Set context id */
+       write_sysreg(ctx, contextidr_el1);
+       isb();
+       asm volatile("hw_bp_ctx: nop");
+       write_sysreg(0, contextidr_el1);
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
+
+       /* Linked watchpoint */
+       reset_debug_state();
+       install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
+       /* Set context id */
+       write_sysreg(ctx, contextidr_el1);
+       isb();
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+       GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+       sw_bp_addr = regs->pc;
+       regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+       hw_bp_addr = regs->pc;
+       regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+       wp_data_addr = read_sysreg(far_el1);
+       wp_addr = regs->pc;
+       regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
+       ss_addr[ss_idx++] = regs->pc;
+       regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+       svc_addr = regs->pc;
+}
+
+static void guest_code_ss(int test_cnt)
+{
+       uint64_t i;
+       uint64_t bvr, wvr, w_bvr, w_wvr;
+
+       for (i = 0; i < test_cnt; i++) {
+               /* Bits [1:0] of dbg{b,w}vr are RES0 */
+               w_bvr = i << 2;
+               w_wvr = i << 2;
+
+               /*
+                * Enable Single Step execution.  Note!  This _must_ be a bare
+                * ucall as the ucall() path uses atomic operations to manage
+                * the ucall structures, and the built-in "atomics" are usually
+                * implemented via exclusive access instructions.  The exlusive
+                * monitor is cleared on ERET, and so taking debug exceptions
+                * during a LDREX=>STREX sequence will prevent forward progress
+                * and hang the guest/test.
+                */
+               GUEST_UCALL_NONE();
+
+               /*
+                * The userspace will verify that the pc is as expected during
+                * single step execution between iter_ss_begin and iter_ss_end.
+                */
+               asm volatile("iter_ss_begin:nop\n");
+
+               write_sysreg(w_bvr, dbgbvr0_el1);
+               write_sysreg(w_wvr, dbgwvr0_el1);
+               bvr = read_sysreg(dbgbvr0_el1);
+               wvr = read_sysreg(dbgwvr0_el1);
+
+               /* Userspace disables Single Step when the end is nigh. */
+               asm volatile("iter_ss_end:\n");
+
+               GUEST_ASSERT_EQ(bvr, w_bvr);
+               GUEST_ASSERT_EQ(wvr, w_wvr);
+       }
+       GUEST_DONE();
+}
+
+static int debug_version(uint64_t id_aa64dfr0)
+{
+       return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+}
+
+static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_BRK64, guest_sw_bp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_SVC64, guest_svc_handler);
+
+       /* Specify bpn/wpn/ctx_bpn to be tested */
+       vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
+       pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
+
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
+
+void test_single_step_from_userspace(int test_cnt)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       struct kvm_run *run;
+       uint64_t pc, cmd;
+       uint64_t test_pc = 0;
+       bool ss_enable = false;
+       struct kvm_guest_debug debug = {};
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
+       run = vcpu->run;
+       vcpu_args_set(vcpu, 1, test_cnt);
+
+       while (1) {
+               vcpu_run(vcpu);
+               if (run->exit_reason != KVM_EXIT_DEBUG) {
+                       cmd = get_ucall(vcpu, &uc);
+                       if (cmd == UCALL_ABORT) {
+                               REPORT_GUEST_ASSERT(uc);
+                               /* NOT REACHED */
+                       } else if (cmd == UCALL_DONE) {
+                               break;
+                       }
+
+                       TEST_ASSERT(cmd == UCALL_NONE,
+                                   "Unexpected ucall cmd 0x%lx", cmd);
+
+                       debug.control = KVM_GUESTDBG_ENABLE |
+                                       KVM_GUESTDBG_SINGLESTEP;
+                       ss_enable = true;
+                       vcpu_guest_debug_set(vcpu, &debug);
+                       continue;
+               }
+
+               TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
+
+               /* Check if the current pc is expected. */
+               pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+               TEST_ASSERT(!test_pc || pc == test_pc,
+                           "Unexpected pc 0x%lx (expected 0x%lx)",
+                           pc, test_pc);
+
+               if ((pc + 4) == (uint64_t)&iter_ss_end) {
+                       test_pc = 0;
+                       debug.control = KVM_GUESTDBG_ENABLE;
+                       ss_enable = false;
+                       vcpu_guest_debug_set(vcpu, &debug);
+                       continue;
+               }
+
+               /*
+                * If the current pc is between iter_ss_bgin and
+                * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
+                * be the current pc + 4.
+                */
+               if ((pc >= (uint64_t)&iter_ss_begin) &&
+                   (pc < (uint64_t)&iter_ss_end))
+                       test_pc = pc + 4;
+               else
+                       test_pc = 0;
+       }
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Run debug testing using the various breakpoint#, watchpoint# and
+ * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
+ */
+void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+{
+       uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+       int b, w, c;
+
+       /* Number of breakpoints */
+       brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+       __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+       /* Number of watchpoints */
+       wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+
+       /* Number of context aware breakpoints */
+       ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+
+       pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
+                brp_num, wrp_num, ctx_brp_num);
+
+       /* Number of normal (non-context aware) breakpoints */
+       normal_brp_num = brp_num - ctx_brp_num;
+
+       /* Lowest context aware breakpoint number */
+       ctx_brp_base = normal_brp_num;
+
+       /* Run tests with all supported breakpoints/watchpoints */
+       for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
+               for (b = 0; b < normal_brp_num; b++) {
+                       for (w = 0; w < wrp_num; w++)
+                               test_guest_debug_exceptions(b, w, c);
+               }
+       }
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int opt;
+       int ss_iteration = 10000;
+       uint64_t aa64dfr0;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+       __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
+                      "Armv8 debug architecture not supported.");
+       kvm_vm_free(vm);
+
+       while ((opt = getopt(argc, argv, "i:")) != -1) {
+               switch (opt) {
+               case 'i':
+                       ss_iteration = atoi_positive("Number of iterations", optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       test_guest_debug_exceptions_all(aa64dfr0);
+       test_single_step_from_userspace(ss_iteration);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c

new file mode 100644 (file)

index 0000000..d43fb3f
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+struct feature_id_reg {
+       __u64 reg;
+       __u64 id_reg;
+       __u64 feat_shift;
+       __u64 feat_min;
+};
+
+static struct feature_id_reg feat_id_regs[] = {
+       {
+               ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               0,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               8,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               8,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               16,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               16,
+               1
+       }
+};
+
+bool filter_reg(__u64 reg)
+{
+       /*
+        * DEMUX register presence depends on the host's CLIDR_EL1.
+        * This means there's no set of them that we can bless.
+        */
+       if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+               return true;
+
+       return false;
+}
+
+static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+       int i, ret;
+       __u64 data, feat_val;
+
+       for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
+               if (feat_id_regs[i].reg == reg) {
+                       ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
+                       if (ret < 0)
+                               return false;
+
+                       feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
+                       return feat_val >= feat_id_regs[i].feat_min;
+               }
+       }
+
+       return true;
+}
+
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+       return check_supported_feat_reg(vcpu, reg);
+}
+
+bool check_reject_set(int err)
+{
+       return err == EPERM;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+       int feature;
+
+       for_each_sublist(c, s) {
+               if (s->finalize) {
+                       feature = s->feature;
+                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+               }
+       }
+}
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define CORE_REGS_XX_NR_WORDS  2
+#define CORE_SPSR_XX_NR_WORDS  2
+#define CORE_FPREGS_XX_NR_WORDS        4
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 core_off = id & ~REG_MASK, idx;
+
+       /*
+        * core_off is the offset into struct kvm_regs
+        */
+       switch (core_off) {
+       case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+            KVM_REG_ARM_CORE_REG(regs.regs[30]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
+       case KVM_REG_ARM_CORE_REG(regs.sp):
+               return "KVM_REG_ARM_CORE_REG(regs.sp)";
+       case KVM_REG_ARM_CORE_REG(regs.pc):
+               return "KVM_REG_ARM_CORE_REG(regs.pc)";
+       case KVM_REG_ARM_CORE_REG(regs.pstate):
+               return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+       case KVM_REG_ARM_CORE_REG(sp_el1):
+               return "KVM_REG_ARM_CORE_REG(sp_el1)";
+       case KVM_REG_ARM_CORE_REG(elr_el1):
+               return "KVM_REG_ARM_CORE_REG(elr_el1)";
+       case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+            KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
+       case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+            KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
+       case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+               return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+       case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+               return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+       }
+
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *sve_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 sve_off, n, i;
+
+       if (id == KVM_REG_ARM64_SVE_VLS)
+               return "KVM_REG_ARM64_SVE_VLS";
+
+       sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+       i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
+
+       switch (sve_off) {
+       case KVM_REG_ARM64_SVE_ZREG_BASE ...
+            KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+               n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
+       case KVM_REG_ARM64_SVE_PREG_BASE ...
+            KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+               n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
+       case KVM_REG_ARM64_SVE_FFR_BASE:
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
+               return "KVM_REG_ARM64_SVE_FFR(0)";
+       }
+
+       return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+       unsigned op0, op1, crn, crm, op2;
+       const char *reg_size = NULL;
+
+       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
+
+       switch (id & KVM_REG_SIZE_MASK) {
+       case KVM_REG_SIZE_U8:
+               reg_size = "KVM_REG_SIZE_U8";
+               break;
+       case KVM_REG_SIZE_U16:
+               reg_size = "KVM_REG_SIZE_U16";
+               break;
+       case KVM_REG_SIZE_U32:
+               reg_size = "KVM_REG_SIZE_U32";
+               break;
+       case KVM_REG_SIZE_U64:
+               reg_size = "KVM_REG_SIZE_U64";
+               break;
+       case KVM_REG_SIZE_U128:
+               reg_size = "KVM_REG_SIZE_U128";
+               break;
+       case KVM_REG_SIZE_U256:
+               reg_size = "KVM_REG_SIZE_U256";
+               break;
+       case KVM_REG_SIZE_U512:
+               reg_size = "KVM_REG_SIZE_U512";
+               break;
+       case KVM_REG_SIZE_U1024:
+               reg_size = "KVM_REG_SIZE_U1024";
+               break;
+       case KVM_REG_SIZE_U2048:
+               reg_size = "KVM_REG_SIZE_U2048";
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+       }
+
+       switch (id & KVM_REG_ARM_COPROC_MASK) {
+       case KVM_REG_ARM_CORE:
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
+               break;
+       case KVM_REG_ARM_DEMUX:
+               TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+                      reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+               break;
+       case KVM_REG_ARM64_SYSREG:
+               op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+               op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+               crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+               crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+               op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+               TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
+               printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+               break;
+       case KVM_REG_ARM_FW:
+               TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+                           "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
+               printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+               break;
+       case KVM_REG_ARM_FW_FEAT_BMAP:
+               TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
+                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
+               printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
+               break;
+       case KVM_REG_ARM64_SVE:
+               printf("\t%s,\n", sve_id_to_str(prefix, id));
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+                         prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+       }
+}
+
+/*
+ * The original blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
+ *
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
+ */
+static __u64 base_regs[] = {
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+       KVM_REG_ARM_FW_REG(0),          /* KVM_REG_ARM_PSCI_VERSION */
+       KVM_REG_ARM_FW_REG(1),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+       KVM_REG_ARM_FW_REG(2),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+       KVM_REG_ARM_FW_REG(3),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
+       KVM_REG_ARM_FW_FEAT_BMAP_REG(0),        /* KVM_REG_ARM_STD_BMAP */
+       KVM_REG_ARM_FW_FEAT_BMAP_REG(1),        /* KVM_REG_ARM_STD_HYP_BMAP */
+       KVM_REG_ARM_FW_FEAT_BMAP_REG(2),        /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+       ARM64_SYS_REG(3, 3, 14, 3, 1),  /* CNTV_CTL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 3, 2),  /* CNTV_CVAL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 0, 2),
+       ARM64_SYS_REG(3, 0, 0, 0, 0),   /* MIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 0, 6),   /* REVIDR_EL1 */
+       ARM64_SYS_REG(3, 1, 0, 0, 1),   /* CLIDR_EL1 */
+       ARM64_SYS_REG(3, 1, 0, 0, 7),   /* AIDR_EL1 */
+       ARM64_SYS_REG(3, 3, 0, 0, 1),   /* CTR_EL0 */
+       ARM64_SYS_REG(2, 0, 0, 0, 4),
+       ARM64_SYS_REG(2, 0, 0, 0, 5),
+       ARM64_SYS_REG(2, 0, 0, 0, 6),
+       ARM64_SYS_REG(2, 0, 0, 0, 7),
+       ARM64_SYS_REG(2, 0, 0, 1, 4),
+       ARM64_SYS_REG(2, 0, 0, 1, 5),
+       ARM64_SYS_REG(2, 0, 0, 1, 6),
+       ARM64_SYS_REG(2, 0, 0, 1, 7),
+       ARM64_SYS_REG(2, 0, 0, 2, 0),   /* MDCCINT_EL1 */
+       ARM64_SYS_REG(2, 0, 0, 2, 2),   /* MDSCR_EL1 */
+       ARM64_SYS_REG(2, 0, 0, 2, 4),
+       ARM64_SYS_REG(2, 0, 0, 2, 5),
+       ARM64_SYS_REG(2, 0, 0, 2, 6),
+       ARM64_SYS_REG(2, 0, 0, 2, 7),
+       ARM64_SYS_REG(2, 0, 0, 3, 4),
+       ARM64_SYS_REG(2, 0, 0, 3, 5),
+       ARM64_SYS_REG(2, 0, 0, 3, 6),
+       ARM64_SYS_REG(2, 0, 0, 3, 7),
+       ARM64_SYS_REG(2, 0, 0, 4, 4),
+       ARM64_SYS_REG(2, 0, 0, 4, 5),
+       ARM64_SYS_REG(2, 0, 0, 4, 6),
+       ARM64_SYS_REG(2, 0, 0, 4, 7),
+       ARM64_SYS_REG(2, 0, 0, 5, 4),
+       ARM64_SYS_REG(2, 0, 0, 5, 5),
+       ARM64_SYS_REG(2, 0, 0, 5, 6),
+       ARM64_SYS_REG(2, 0, 0, 5, 7),
+       ARM64_SYS_REG(2, 0, 0, 6, 4),
+       ARM64_SYS_REG(2, 0, 0, 6, 5),
+       ARM64_SYS_REG(2, 0, 0, 6, 6),
+       ARM64_SYS_REG(2, 0, 0, 6, 7),
+       ARM64_SYS_REG(2, 0, 0, 7, 4),
+       ARM64_SYS_REG(2, 0, 0, 7, 5),
+       ARM64_SYS_REG(2, 0, 0, 7, 6),
+       ARM64_SYS_REG(2, 0, 0, 7, 7),
+       ARM64_SYS_REG(2, 0, 0, 8, 4),
+       ARM64_SYS_REG(2, 0, 0, 8, 5),
+       ARM64_SYS_REG(2, 0, 0, 8, 6),
+       ARM64_SYS_REG(2, 0, 0, 8, 7),
+       ARM64_SYS_REG(2, 0, 0, 9, 4),
+       ARM64_SYS_REG(2, 0, 0, 9, 5),
+       ARM64_SYS_REG(2, 0, 0, 9, 6),
+       ARM64_SYS_REG(2, 0, 0, 9, 7),
+       ARM64_SYS_REG(2, 0, 0, 10, 4),
+       ARM64_SYS_REG(2, 0, 0, 10, 5),
+       ARM64_SYS_REG(2, 0, 0, 10, 6),
+       ARM64_SYS_REG(2, 0, 0, 10, 7),
+       ARM64_SYS_REG(2, 0, 0, 11, 4),
+       ARM64_SYS_REG(2, 0, 0, 11, 5),
+       ARM64_SYS_REG(2, 0, 0, 11, 6),
+       ARM64_SYS_REG(2, 0, 0, 11, 7),
+       ARM64_SYS_REG(2, 0, 0, 12, 4),
+       ARM64_SYS_REG(2, 0, 0, 12, 5),
+       ARM64_SYS_REG(2, 0, 0, 12, 6),
+       ARM64_SYS_REG(2, 0, 0, 12, 7),
+       ARM64_SYS_REG(2, 0, 0, 13, 4),
+       ARM64_SYS_REG(2, 0, 0, 13, 5),
+       ARM64_SYS_REG(2, 0, 0, 13, 6),
+       ARM64_SYS_REG(2, 0, 0, 13, 7),
+       ARM64_SYS_REG(2, 0, 0, 14, 4),
+       ARM64_SYS_REG(2, 0, 0, 14, 5),
+       ARM64_SYS_REG(2, 0, 0, 14, 6),
+       ARM64_SYS_REG(2, 0, 0, 14, 7),
+       ARM64_SYS_REG(2, 0, 0, 15, 4),
+       ARM64_SYS_REG(2, 0, 0, 15, 5),
+       ARM64_SYS_REG(2, 0, 0, 15, 6),
+       ARM64_SYS_REG(2, 0, 0, 15, 7),
+       ARM64_SYS_REG(2, 0, 1, 1, 4),   /* OSLSR_EL1 */
+       ARM64_SYS_REG(2, 4, 0, 7, 0),   /* DBGVCR32_EL2 */
+       ARM64_SYS_REG(3, 0, 0, 0, 5),   /* MPIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 0),   /* ID_PFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 1),   /* ID_PFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 2),   /* ID_DFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 3),   /* ID_AFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 4),   /* ID_MMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 5),   /* ID_MMFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 6),   /* ID_MMFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 7),   /* ID_MMFR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 0),   /* ID_ISAR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 1),   /* ID_ISAR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 2),   /* ID_ISAR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 3),   /* ID_ISAR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 4),   /* ID_ISAR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 5),   /* ID_ISAR5_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 6),   /* ID_MMFR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 7),   /* ID_ISAR6_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 0),   /* MVFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 1),   /* MVFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 2),   /* MVFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 3),
+       ARM64_SYS_REG(3, 0, 0, 3, 4),   /* ID_PFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 5),   /* ID_DFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 6),   /* ID_MMFR5_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 7),
+       ARM64_SYS_REG(3, 0, 0, 4, 0),   /* ID_AA64PFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 1),   /* ID_AA64PFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 2),   /* ID_AA64PFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 3),
+       ARM64_SYS_REG(3, 0, 0, 4, 4),   /* ID_AA64ZFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 5),   /* ID_AA64SMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 6),
+       ARM64_SYS_REG(3, 0, 0, 4, 7),
+       ARM64_SYS_REG(3, 0, 0, 5, 0),   /* ID_AA64DFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 1),   /* ID_AA64DFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 2),
+       ARM64_SYS_REG(3, 0, 0, 5, 3),
+       ARM64_SYS_REG(3, 0, 0, 5, 4),   /* ID_AA64AFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 5),   /* ID_AA64AFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 6),
+       ARM64_SYS_REG(3, 0, 0, 5, 7),
+       ARM64_SYS_REG(3, 0, 0, 6, 0),   /* ID_AA64ISAR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 1),   /* ID_AA64ISAR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 2),   /* ID_AA64ISAR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 3),
+       ARM64_SYS_REG(3, 0, 0, 6, 4),
+       ARM64_SYS_REG(3, 0, 0, 6, 5),
+       ARM64_SYS_REG(3, 0, 0, 6, 6),
+       ARM64_SYS_REG(3, 0, 0, 6, 7),
+       ARM64_SYS_REG(3, 0, 0, 7, 0),   /* ID_AA64MMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 1),   /* ID_AA64MMFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 2),   /* ID_AA64MMFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 4),   /* ID_AA64MMFR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 5),
+       ARM64_SYS_REG(3, 0, 0, 7, 6),
+       ARM64_SYS_REG(3, 0, 0, 7, 7),
+       ARM64_SYS_REG(3, 0, 1, 0, 0),   /* SCTLR_EL1 */
+       ARM64_SYS_REG(3, 0, 1, 0, 1),   /* ACTLR_EL1 */
+       ARM64_SYS_REG(3, 0, 1, 0, 2),   /* CPACR_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 0),   /* TTBR0_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 1),   /* TTBR1_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 2),   /* TCR_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 1, 0),   /* AFSR0_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 1, 1),   /* AFSR1_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 2, 0),   /* ESR_EL1 */
+       ARM64_SYS_REG(3, 0, 6, 0, 0),   /* FAR_EL1 */
+       ARM64_SYS_REG(3, 0, 7, 4, 0),   /* PAR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 0),  /* MAIR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 3, 0),  /* AMAIR_EL1 */
+       ARM64_SYS_REG(3, 0, 12, 0, 0),  /* VBAR_EL1 */
+       ARM64_SYS_REG(3, 0, 12, 1, 1),  /* DISR_EL1 */
+       ARM64_SYS_REG(3, 0, 13, 0, 1),  /* CONTEXTIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 13, 0, 4),  /* TPIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 14, 1, 0),  /* CNTKCTL_EL1 */
+       ARM64_SYS_REG(3, 2, 0, 0, 0),   /* CSSELR_EL1 */
+       ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 0, 1),  /* CNTPCT_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 2, 1),  /* CNTP_CTL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 2, 2),  /* CNTP_CVAL_EL0 */
+       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
+       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
+       ARM64_SYS_REG(3, 3, 9, 12, 0),  /* PMCR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 1),  /* PMCNTENSET_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 2),  /* PMCNTENCLR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 3),  /* PMOVSCLR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 4),  /* PMSWINC_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 5),  /* PMSELR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 13, 0),  /* PMCCNTR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 14, 0),  /* PMUSERENR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 14, 3),  /* PMOVSSET_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 8, 0),
+       ARM64_SYS_REG(3, 3, 14, 8, 1),
+       ARM64_SYS_REG(3, 3, 14, 8, 2),
+       ARM64_SYS_REG(3, 3, 14, 8, 3),
+       ARM64_SYS_REG(3, 3, 14, 8, 4),
+       ARM64_SYS_REG(3, 3, 14, 8, 5),
+       ARM64_SYS_REG(3, 3, 14, 8, 6),
+       ARM64_SYS_REG(3, 3, 14, 8, 7),
+       ARM64_SYS_REG(3, 3, 14, 9, 0),
+       ARM64_SYS_REG(3, 3, 14, 9, 1),
+       ARM64_SYS_REG(3, 3, 14, 9, 2),
+       ARM64_SYS_REG(3, 3, 14, 9, 3),
+       ARM64_SYS_REG(3, 3, 14, 9, 4),
+       ARM64_SYS_REG(3, 3, 14, 9, 5),
+       ARM64_SYS_REG(3, 3, 14, 9, 6),
+       ARM64_SYS_REG(3, 3, 14, 9, 7),
+       ARM64_SYS_REG(3, 3, 14, 10, 0),
+       ARM64_SYS_REG(3, 3, 14, 10, 1),
+       ARM64_SYS_REG(3, 3, 14, 10, 2),
+       ARM64_SYS_REG(3, 3, 14, 10, 3),
+       ARM64_SYS_REG(3, 3, 14, 10, 4),
+       ARM64_SYS_REG(3, 3, 14, 10, 5),
+       ARM64_SYS_REG(3, 3, 14, 10, 6),
+       ARM64_SYS_REG(3, 3, 14, 10, 7),
+       ARM64_SYS_REG(3, 3, 14, 11, 0),
+       ARM64_SYS_REG(3, 3, 14, 11, 1),
+       ARM64_SYS_REG(3, 3, 14, 11, 2),
+       ARM64_SYS_REG(3, 3, 14, 11, 3),
+       ARM64_SYS_REG(3, 3, 14, 11, 4),
+       ARM64_SYS_REG(3, 3, 14, 11, 5),
+       ARM64_SYS_REG(3, 3, 14, 11, 6),
+       ARM64_SYS_REG(3, 3, 14, 12, 0),
+       ARM64_SYS_REG(3, 3, 14, 12, 1),
+       ARM64_SYS_REG(3, 3, 14, 12, 2),
+       ARM64_SYS_REG(3, 3, 14, 12, 3),
+       ARM64_SYS_REG(3, 3, 14, 12, 4),
+       ARM64_SYS_REG(3, 3, 14, 12, 5),
+       ARM64_SYS_REG(3, 3, 14, 12, 6),
+       ARM64_SYS_REG(3, 3, 14, 12, 7),
+       ARM64_SYS_REG(3, 3, 14, 13, 0),
+       ARM64_SYS_REG(3, 3, 14, 13, 1),
+       ARM64_SYS_REG(3, 3, 14, 13, 2),
+       ARM64_SYS_REG(3, 3, 14, 13, 3),
+       ARM64_SYS_REG(3, 3, 14, 13, 4),
+       ARM64_SYS_REG(3, 3, 14, 13, 5),
+       ARM64_SYS_REG(3, 3, 14, 13, 6),
+       ARM64_SYS_REG(3, 3, 14, 13, 7),
+       ARM64_SYS_REG(3, 3, 14, 14, 0),
+       ARM64_SYS_REG(3, 3, 14, 14, 1),
+       ARM64_SYS_REG(3, 3, 14, 14, 2),
+       ARM64_SYS_REG(3, 3, 14, 14, 3),
+       ARM64_SYS_REG(3, 3, 14, 14, 4),
+       ARM64_SYS_REG(3, 3, 14, 14, 5),
+       ARM64_SYS_REG(3, 3, 14, 14, 6),
+       ARM64_SYS_REG(3, 3, 14, 14, 7),
+       ARM64_SYS_REG(3, 3, 14, 15, 0),
+       ARM64_SYS_REG(3, 3, 14, 15, 1),
+       ARM64_SYS_REG(3, 3, 14, 15, 2),
+       ARM64_SYS_REG(3, 3, 14, 15, 3),
+       ARM64_SYS_REG(3, 3, 14, 15, 4),
+       ARM64_SYS_REG(3, 3, 14, 15, 5),
+       ARM64_SYS_REG(3, 3, 14, 15, 6),
+       ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
+};
+
+static __u64 vregs[] = {
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+
+static __u64 sve_regs[] = {
+       KVM_REG_ARM64_SVE_VLS,
+       KVM_REG_ARM64_SVE_ZREG(0, 0),
+       KVM_REG_ARM64_SVE_ZREG(1, 0),
+       KVM_REG_ARM64_SVE_ZREG(2, 0),
+       KVM_REG_ARM64_SVE_ZREG(3, 0),
+       KVM_REG_ARM64_SVE_ZREG(4, 0),
+       KVM_REG_ARM64_SVE_ZREG(5, 0),
+       KVM_REG_ARM64_SVE_ZREG(6, 0),
+       KVM_REG_ARM64_SVE_ZREG(7, 0),
+       KVM_REG_ARM64_SVE_ZREG(8, 0),
+       KVM_REG_ARM64_SVE_ZREG(9, 0),
+       KVM_REG_ARM64_SVE_ZREG(10, 0),
+       KVM_REG_ARM64_SVE_ZREG(11, 0),
+       KVM_REG_ARM64_SVE_ZREG(12, 0),
+       KVM_REG_ARM64_SVE_ZREG(13, 0),
+       KVM_REG_ARM64_SVE_ZREG(14, 0),
+       KVM_REG_ARM64_SVE_ZREG(15, 0),
+       KVM_REG_ARM64_SVE_ZREG(16, 0),
+       KVM_REG_ARM64_SVE_ZREG(17, 0),
+       KVM_REG_ARM64_SVE_ZREG(18, 0),
+       KVM_REG_ARM64_SVE_ZREG(19, 0),
+       KVM_REG_ARM64_SVE_ZREG(20, 0),
+       KVM_REG_ARM64_SVE_ZREG(21, 0),
+       KVM_REG_ARM64_SVE_ZREG(22, 0),
+       KVM_REG_ARM64_SVE_ZREG(23, 0),
+       KVM_REG_ARM64_SVE_ZREG(24, 0),
+       KVM_REG_ARM64_SVE_ZREG(25, 0),
+       KVM_REG_ARM64_SVE_ZREG(26, 0),
+       KVM_REG_ARM64_SVE_ZREG(27, 0),
+       KVM_REG_ARM64_SVE_ZREG(28, 0),
+       KVM_REG_ARM64_SVE_ZREG(29, 0),
+       KVM_REG_ARM64_SVE_ZREG(30, 0),
+       KVM_REG_ARM64_SVE_ZREG(31, 0),
+       KVM_REG_ARM64_SVE_PREG(0, 0),
+       KVM_REG_ARM64_SVE_PREG(1, 0),
+       KVM_REG_ARM64_SVE_PREG(2, 0),
+       KVM_REG_ARM64_SVE_PREG(3, 0),
+       KVM_REG_ARM64_SVE_PREG(4, 0),
+       KVM_REG_ARM64_SVE_PREG(5, 0),
+       KVM_REG_ARM64_SVE_PREG(6, 0),
+       KVM_REG_ARM64_SVE_PREG(7, 0),
+       KVM_REG_ARM64_SVE_PREG(8, 0),
+       KVM_REG_ARM64_SVE_PREG(9, 0),
+       KVM_REG_ARM64_SVE_PREG(10, 0),
+       KVM_REG_ARM64_SVE_PREG(11, 0),
+       KVM_REG_ARM64_SVE_PREG(12, 0),
+       KVM_REG_ARM64_SVE_PREG(13, 0),
+       KVM_REG_ARM64_SVE_PREG(14, 0),
+       KVM_REG_ARM64_SVE_PREG(15, 0),
+       KVM_REG_ARM64_SVE_FFR(0),
+       ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
+};
+
+static __u64 sve_rejects_set[] = {
+       KVM_REG_ARM64_SVE_VLS,
+};
+
+static __u64 pauth_addr_regs[] = {
+       ARM64_SYS_REG(3, 0, 2, 1, 0),   /* APIAKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 1, 1),   /* APIAKEYHI_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 1, 2),   /* APIBKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 1, 3),   /* APIBKEYHI_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 0),   /* APDAKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 1),   /* APDAKEYHI_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 2),   /* APDBKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 3)    /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+       ARM64_SYS_REG(3, 0, 2, 3, 0),   /* APGAKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 3, 1),   /* APGAKEYHI_EL1 */
+};
+
+#define BASE_SUBLIST \
+       { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+       { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+       { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+         .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+       { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+         .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+         .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST                                                  \
+       {                                                               \
+               .name           = "pauth_address",                      \
+               .capability     = KVM_CAP_ARM_PTRAUTH_ADDRESS,          \
+               .feature        = KVM_ARM_VCPU_PTRAUTH_ADDRESS,         \
+               .regs           = pauth_addr_regs,                      \
+               .regs_n         = ARRAY_SIZE(pauth_addr_regs),          \
+       },                                                              \
+       {                                                               \
+               .name           = "pauth_generic",                      \
+               .capability     = KVM_CAP_ARM_PTRAUTH_GENERIC,          \
+               .feature        = KVM_ARM_VCPU_PTRAUTH_GENERIC,         \
+               .regs           = pauth_generic_regs,                   \
+               .regs_n         = ARRAY_SIZE(pauth_generic_regs),       \
+       }
+
+static struct vcpu_reg_list vregs_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list vregs_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list sve_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVE_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list sve_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVE_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list pauth_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PAUTH_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list pauth_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PAUTH_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+       &vregs_config,
+       &vregs_pmu_config,
+       &sve_config,
+       &sve_pmu_config,
+       &pauth_config,
+       &pauth_pmu_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c

new file mode 100644 (file)

index 0000000..ec54ec7
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
+ *
+ * The test validates the basic hypercall functionalities that are exposed
+ * via the psuedo-firmware bitmap register. This includes the registers'
+ * read/write behavior before and after the VM has started, and if the
+ * hypercalls are properly masked or unmasked to the guest when disabled or
+ * enabled from the KVM userspace, respectively.
+ */
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
+
+/* Last valid bits of the bitmapped firmware registers */
+#define KVM_REG_ARM_STD_BMAP_BIT_MAX           0
+#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX       0
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX    1
+
+struct kvm_fw_reg_info {
+       uint64_t reg;           /* Register definition */
+       uint64_t max_feat_bit;  /* Bit that represents the upper limit of the feature-map */
+};
+
+#define FW_REG_INFO(r)                 \
+       {                                       \
+               .reg = r,                       \
+               .max_feat_bit = r##_BIT_MAX,    \
+       }
+
+static const struct kvm_fw_reg_info fw_reg_info[] = {
+       FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
+       FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
+       FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+};
+
+enum test_stage {
+       TEST_STAGE_REG_IFACE,
+       TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
+       TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
+       TEST_STAGE_HVC_IFACE_FALSE_INFO,
+       TEST_STAGE_END,
+};
+
+static int stage = TEST_STAGE_REG_IFACE;
+
+struct test_hvc_info {
+       uint32_t func_id;
+       uint64_t arg1;
+};
+
+#define TEST_HVC_INFO(f, a1)   \
+       {                       \
+               .func_id = f,   \
+               .arg1 = a1,     \
+       }
+
+static const struct test_hvc_info hvc_info[] = {
+       /* KVM_REG_ARM_STD_BMAP */
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
+
+       /* KVM_REG_ARM_STD_HYP_BMAP */
+       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
+       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
+       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
+
+       /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
+                       ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
+       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
+};
+
+/* Feed false hypercall info to test the KVM behavior */
+static const struct test_hvc_info false_hvc_info[] = {
+       /* Feature support check against a different family of hypercalls */
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
+       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
+};
+
+static void guest_test_hvc(const struct test_hvc_info *hc_info)
+{
+       unsigned int i;
+       struct arm_smccc_res res;
+       unsigned int hvc_info_arr_sz;
+
+       hvc_info_arr_sz =
+       hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
+
+       for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
+               memset(&res, 0, sizeof(res));
+               smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+
+               switch (stage) {
+               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
+                       break;
+               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
+                       break;
+               default:
+                       GUEST_FAIL("Unexpected stage = %u", stage);
+               }
+       }
+}
+
+static void guest_code(void)
+{
+       while (stage != TEST_STAGE_END) {
+               switch (stage) {
+               case TEST_STAGE_REG_IFACE:
+                       break;
+               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+                       guest_test_hvc(hvc_info);
+                       break;
+               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+                       guest_test_hvc(false_hvc_info);
+                       break;
+               default:
+                       GUEST_FAIL("Unexpected stage = %u", stage);
+               }
+
+               GUEST_SYNC(stage);
+       }
+
+       GUEST_DONE();
+}
+
+struct st_time {
+       uint32_t rev;
+       uint32_t attr;
+       uint64_t st_time;
+};
+
+#define STEAL_TIME_SIZE                ((sizeof(struct st_time) + 63) & ~63)
+#define ST_GPA_BASE            (1 << 30)
+
+static void steal_time_init(struct kvm_vcpu *vcpu)
+{
+       uint64_t st_ipa = (ulong)ST_GPA_BASE;
+       unsigned int gpages;
+
+       gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
+       vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+
+       vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
+                            KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
+}
+
+static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
+{
+       uint64_t val;
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+               /* First 'read' should be an upper limit of the features supported */
+               val = vcpu_get_reg(vcpu, reg_info->reg);
+               TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
+                       "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+                       reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+
+               /* Test a 'write' by disabling all the features of the register map */
+               ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+               TEST_ASSERT(ret == 0,
+                       "Failed to clear all the features of reg: 0x%lx; ret: %d",
+                       reg_info->reg, errno);
+
+               val = vcpu_get_reg(vcpu, reg_info->reg);
+               TEST_ASSERT(val == 0,
+                       "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+
+               /*
+                * Test enabling a feature that's not supported.
+                * Avoid this check if all the bits are occupied.
+                */
+               if (reg_info->max_feat_bit < 63) {
+                       ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+                       TEST_ASSERT(ret != 0 && errno == EINVAL,
+                       "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
+                       errno, reg_info->reg);
+               }
+       }
+}
+
+static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
+{
+       uint64_t val;
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+               /*
+                * Before starting the VM, the test clears all the bits.
+                * Check if that's still the case.
+                */
+               val = vcpu_get_reg(vcpu, reg_info->reg);
+               TEST_ASSERT(val == 0,
+                       "Expected all the features to be cleared for reg: 0x%lx",
+                       reg_info->reg);
+
+               /*
+                * Since the VM has run at least once, KVM shouldn't allow modification of
+                * the registers and should return EBUSY. Set the registers and check for
+                * the expected errno.
+                */
+               ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+               TEST_ASSERT(ret != 0 && errno == EBUSY,
+               "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
+               errno, reg_info->reg);
+       }
+}
+
+static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+       steal_time_init(*vcpu);
+
+       return vm;
+}
+
+static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
+{
+       int prev_stage = stage;
+
+       pr_debug("Stage: %d\n", prev_stage);
+
+       /* Sync the stage early, the VM might be freed below. */
+       stage++;
+       sync_global_to_guest(*vm, stage);
+
+       switch (prev_stage) {
+       case TEST_STAGE_REG_IFACE:
+               test_fw_regs_after_vm_start(*vcpu);
+               break;
+       case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+               /* Start a new VM so that all the features are now enabled by default */
+               kvm_vm_free(*vm);
+               *vm = test_vm_create(vcpu);
+               break;
+       case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+       case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+               break;
+       default:
+               TEST_FAIL("Unknown test stage: %d", prev_stage);
+       }
+}
+
+static void test_run(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       bool guest_done = false;
+
+       vm = test_vm_create(&vcpu);
+
+       test_fw_regs_before_vm_start(vcpu);
+
+       while (!guest_done) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       test_guest_stage(&vm, &vcpu);
+                       break;
+               case UCALL_DONE:
+                       guest_done = true;
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               default:
+                       TEST_FAIL("Unexpected guest exit");
+               }
+       }
+
+       kvm_vm_free(vm);
+}
+
+int main(void)
+{
+       test_run();
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c

new file mode 100644 (file)

index 0000000..8b7a80a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/mmio_abort.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * mmio_abort - Tests for userspace MMIO abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR      0x8000000ULL
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+       u64 esr = read_sysreg(esr_el1);
+
+       GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+       GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+       GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+       GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+       GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+                                                 handler_fn dabt_handler)
+{
+       struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(*vcpu);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+       virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+       return vm;
+}
+
+static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_events events = {};
+
+       events.exception.ext_dabt_pending = true;
+       vcpu_events_set(vcpu, &events);
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
+
+extern char test_mmio_abort_insn;
+
+static void test_mmio_abort_guest(void)
+{
+       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+       asm volatile("test_mmio_abort_insn:\n\t"
+                    "ldr x0, [%0]\n\t"
+                    : : "r" (MMIO_ADDR) : "x0", "memory");
+
+       GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+                                                       expect_sea_handler);
+       struct kvm_run *run = vcpu->run;
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+       TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+       TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+       TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+       vcpu_inject_extabt(vcpu);
+       vcpu_run_expect_done(vcpu);
+       kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+       asm volatile("test_mmio_nisv_insn:\n\t"
+                    "ldr x0, [%0], #8\n\t"
+                    : : "r" (MMIO_ADDR) : "x0", "memory");
+
+       GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+                                                       unexpected_dabt_handler);
+
+       TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+       TEST_ASSERT_EQ(errno, ENOSYS);
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+                                                       expect_sea_handler);
+       struct kvm_run *run = vcpu->run;
+
+       vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+       TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+       vcpu_inject_extabt(vcpu);
+       vcpu_run_expect_done(vcpu);
+       kvm_vm_free(vm);
+}
+
+int main(void)
+{
+       test_mmio_abort();
+       test_mmio_nisv();
+       test_mmio_nisv_abort();
+}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c

new file mode 100644 (file)

index 0000000..ebd7043
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r)                                     \
+       ({                                                      \
+               uint64_t val;                                   \
+                                                               \
+               handled = false;                                \
+               dsb(sy);                                        \
+               val = read_sysreg_s(SYS_ ## r);                 \
+               val;                                            \
+       })
+
+#define __check_sr_write(r)                                    \
+       do {                                                    \
+               handled = false;                                \
+               dsb(sy);                                        \
+               write_sysreg_s(0, SYS_ ## r);                   \
+               isb();                                          \
+       } while(0)
+
+/* Fatal checks */
+#define check_sr_read(r)                                       \
+       do {                                                    \
+               __check_sr_read(r);                             \
+               __GUEST_ASSERT(handled, #r " no read trap");    \
+       } while(0)
+
+#define check_sr_write(r)                                      \
+       do {                                                    \
+               __check_sr_write(r);                            \
+               __GUEST_ASSERT(handled, #r " no write trap");   \
+       } while(0)
+
+#define check_sr_rw(r)                         \
+       do {                                    \
+               check_sr_read(r);               \
+               check_sr_write(r);              \
+       } while(0)
+
+static void guest_code(void)
+{
+       uint64_t val;
+
+       /*
+        * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+        * hidden the feature at runtime without any other userspace action.
+        */
+       __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+                                read_sysreg(id_aa64pfr0_el1)) == 0,
+                      "GICv3 wrongly advertised");
+
+       /*
+        * Access all GICv3 registers, and fail if we don't get an UNDEF.
+        * Note that we happily access all the APxRn registers without
+        * checking their existance, as all we want to see is a failure.
+        */
+       check_sr_rw(ICC_PMR_EL1);
+       check_sr_read(ICC_IAR0_EL1);
+       check_sr_write(ICC_EOIR0_EL1);
+       check_sr_rw(ICC_HPPIR0_EL1);
+       check_sr_rw(ICC_BPR0_EL1);
+       check_sr_rw(ICC_AP0R0_EL1);
+       check_sr_rw(ICC_AP0R1_EL1);
+       check_sr_rw(ICC_AP0R2_EL1);
+       check_sr_rw(ICC_AP0R3_EL1);
+       check_sr_rw(ICC_AP1R0_EL1);
+       check_sr_rw(ICC_AP1R1_EL1);
+       check_sr_rw(ICC_AP1R2_EL1);
+       check_sr_rw(ICC_AP1R3_EL1);
+       check_sr_write(ICC_DIR_EL1);
+       check_sr_read(ICC_RPR_EL1);
+       check_sr_write(ICC_SGI1R_EL1);
+       check_sr_write(ICC_ASGI1R_EL1);
+       check_sr_write(ICC_SGI0R_EL1);
+       check_sr_read(ICC_IAR1_EL1);
+       check_sr_write(ICC_EOIR1_EL1);
+       check_sr_rw(ICC_HPPIR1_EL1);
+       check_sr_rw(ICC_BPR1_EL1);
+       check_sr_rw(ICC_CTLR_EL1);
+       check_sr_rw(ICC_IGRPEN0_EL1);
+       check_sr_rw(ICC_IGRPEN1_EL1);
+
+       /*
+        * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+        * be RAO/WI. Engage in non-fatal accesses, starting with a
+        * write of 0 to try and disable SRE, and let's see if it
+        * sticks.
+        */
+       __check_sr_write(ICC_SRE_EL1);
+       if (!handled)
+               GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+       val = __check_sr_read(ICC_SRE_EL1);
+       if (!handled) {
+               __GUEST_ASSERT((val & BIT(0)),
+                              "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+               GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+       }
+
+       GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+       /* Success, we've gracefully exploded! */
+       handled = true;
+       regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       do {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_PRINTF:
+                       printf("%s", uc.buffer);
+                       break;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Create a VM without a GICv3 */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+       test_run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t pfr0;
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+       __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+                      "GICv3 not supported.");
+       kvm_vm_free(vm);
+
+       test_guest_no_gicv3();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c

new file mode 100644 (file)

index 0000000..ec33a8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <asm/sysreg.h>
+#include <linux/bitfield.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define TEST_GVA                               0xc0000000
+#define TEST_EXEC_GVA                          (TEST_GVA + 0x8)
+#define TEST_PTE_GVA                           0xb0000000
+#define TEST_DATA                              0x0123456789ABCDEF
+
+static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+
+#define CMD_NONE                               (0)
+#define CMD_SKIP_TEST                          (1ULL << 1)
+#define CMD_HOLE_PT                            (1ULL << 2)
+#define CMD_HOLE_DATA                          (1ULL << 3)
+#define CMD_CHECK_WRITE_IN_DIRTY_LOG           (1ULL << 4)
+#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG                (1ULL << 5)
+#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG                (1ULL << 6)
+#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG     (1ULL << 7)
+#define CMD_SET_PTE_AF                         (1ULL << 8)
+
+#define PREPARE_FN_NR                          10
+#define CHECK_FN_NR                            10
+
+static struct event_cnt {
+       int mmio_exits;
+       int fail_vcpu_runs;
+       int uffd_faults;
+       /* uffd_faults is incremented from multiple threads. */
+       pthread_mutex_t uffd_faults_mutex;
+} events;
+
+struct test_desc {
+       const char *name;
+       uint64_t mem_mark_cmd;
+       /* Skip the test if any prepare function returns false */
+       bool (*guest_prepare[PREPARE_FN_NR])(void);
+       void (*guest_test)(void);
+       void (*guest_test_check[CHECK_FN_NR])(void);
+       uffd_handler_t uffd_pt_handler;
+       uffd_handler_t uffd_data_handler;
+       void (*dabt_handler)(struct ex_regs *regs);
+       void (*iabt_handler)(struct ex_regs *regs);
+       void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
+       void (*fail_vcpu_run_handler)(int ret);
+       uint32_t pt_memslot_flags;
+       uint32_t data_memslot_flags;
+       bool skip;
+       struct event_cnt expected_events;
+};
+
+struct test_params {
+       enum vm_mem_backing_src_type src_type;
+       struct test_desc *test_desc;
+};
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+       uint64_t page = vaddr >> 12;
+
+       dsb(ishst);
+       asm volatile("tlbi vaae1is, %0" :: "r" (page));
+       dsb(ish);
+       isb();
+}
+
+static void guest_write64(void)
+{
+       uint64_t val;
+
+       WRITE_ONCE(*guest_test_memory, TEST_DATA);
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+       uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+       uint64_t atomic;
+
+       atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+       return atomic >= 2;
+}
+
+static bool guest_check_dc_zva(void)
+{
+       uint64_t dczid = read_sysreg(dczid_el0);
+       uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+
+       return dzp == 0;
+}
+
+/* Compare and swap instruction. */
+static void guest_cas(void)
+{
+       uint64_t val;
+
+       GUEST_ASSERT(guest_check_lse());
+       asm volatile(".arch_extension lse\n"
+                    "casal %0, %1, [%2]\n"
+                    :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+static void guest_read64(void)
+{
+       uint64_t val;
+
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_at(void)
+{
+       uint64_t par;
+
+       asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
+       isb();
+       par = read_sysreg(par_el1);
+
+       /* Bit 1 indicates whether the AT was successful */
+       GUEST_ASSERT_EQ(par & 1, 0);
+}
+
+/*
+ * The size of the block written by "dc zva" is guaranteed to be between (2 <<
+ * 0) and (2 << 9), which is safe in our case as we need the write to happen
+ * for at least a word, and not more than a page.
+ */
+static void guest_dc_zva(void)
+{
+       uint16_t val;
+
+       asm volatile("dc zva, %0" :: "r" (guest_test_memory));
+       dsb(ish);
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, 0);
+}
+
+/*
+ * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
+ * And that's special because KVM must take special care with those: they
+ * should still count as accesses for dirty logging or user-faulting, but
+ * should be handled differently on mmio.
+ */
+static void guest_ld_preidx(void)
+{
+       uint64_t val;
+       uint64_t addr = TEST_GVA - 8;
+
+       /*
+        * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
+        * in a gap between memslots not backing by anything.
+        */
+       asm volatile("ldr %0, [%1, #8]!"
+                    : "=r" (val), "+r" (addr));
+       GUEST_ASSERT_EQ(val, 0);
+       GUEST_ASSERT_EQ(addr, TEST_GVA);
+}
+
+static void guest_st_preidx(void)
+{
+       uint64_t val = TEST_DATA;
+       uint64_t addr = TEST_GVA - 8;
+
+       asm volatile("str %0, [%1, #8]!"
+                    : "+r" (val), "+r" (addr));
+
+       GUEST_ASSERT_EQ(addr, TEST_GVA);
+       val = READ_ONCE(*guest_test_memory);
+}
+
+static bool guest_set_ha(void)
+{
+       uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+       uint64_t hadbs, tcr;
+
+       /* Skip if HA is not supported. */
+       hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+       if (hadbs == 0)
+               return false;
+
+       tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+       write_sysreg(tcr, tcr_el1);
+       isb();
+
+       return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+       *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+       flush_tlb_page(TEST_GVA);
+
+       return true;
+}
+
+static void guest_check_pte_af(void)
+{
+       dsb(ish);
+       GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+}
+
+static void guest_check_write_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_write_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_s1ptw_wr_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_s1ptw_wr_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_exec(void)
+{
+       int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
+       int ret;
+
+       ret = code();
+       GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+       bool (*prepare_fn)(void);
+       int i;
+
+       for (i = 0; i < PREPARE_FN_NR; i++) {
+               prepare_fn = test->guest_prepare[i];
+               if (prepare_fn && !prepare_fn())
+                       return false;
+       }
+
+       return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+       void (*check_fn)(void);
+       int i;
+
+       for (i = 0; i < CHECK_FN_NR; i++) {
+               check_fn = test->guest_test_check[i];
+               if (check_fn)
+                       check_fn();
+       }
+}
+
+static void guest_code(struct test_desc *test)
+{
+       if (!guest_prepare(test))
+               GUEST_SYNC(CMD_SKIP_TEST);
+
+       GUEST_SYNC(test->mem_mark_cmd);
+
+       if (test->guest_test)
+               test->guest_test();
+
+       guest_test_check(test);
+       GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
+}
+
+static struct uffd_args {
+       char *copy;
+       void *hva;
+       uint64_t paging_size;
+} pt_args, data_args;
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
+                               struct uffd_args *args)
+{
+       uint64_t addr = msg->arg.pagefault.address;
+       uint64_t flags = msg->arg.pagefault.flags;
+       struct uffdio_copy copy;
+       int ret;
+
+       TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
+                   "The only expected UFFD mode is MISSING");
+       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
+
+       pr_debug("uffd fault: addr=%p write=%d\n",
+                (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
+
+       copy.src = (uint64_t)args->copy;
+       copy.dst = addr;
+       copy.len = args->paging_size;
+       copy.mode = 0;
+
+       ret = ioctl(uffd, UFFDIO_COPY, &copy);
+       if (ret == -1) {
+               pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
+                       addr, errno);
+               return ret;
+       }
+
+       pthread_mutex_lock(&events.uffd_faults_mutex);
+       events.uffd_faults += 1;
+       pthread_mutex_unlock(&events.uffd_faults_mutex);
+       return 0;
+}
+
+static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+       return uffd_generic_handler(mode, uffd, msg, &pt_args);
+}
+
+static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+       return uffd_generic_handler(mode, uffd, msg, &data_args);
+}
+
+static void setup_uffd_args(struct userspace_mem_region *region,
+                           struct uffd_args *args)
+{
+       args->hva = (void *)region->region.userspace_addr;
+       args->paging_size = region->region.memory_size;
+
+       args->copy = malloc(args->paging_size);
+       TEST_ASSERT(args->copy, "Failed to allocate data copy.");
+       memcpy(args->copy, args->hva, args->paging_size);
+}
+
+static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
+                      struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
+{
+       struct test_desc *test = p->test_desc;
+       int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+
+       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
+       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
+
+       *pt_uffd = NULL;
+       if (test->uffd_pt_handler)
+               *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+                                                   pt_args.hva,
+                                                   pt_args.paging_size,
+                                                   1, test->uffd_pt_handler);
+
+       *data_uffd = NULL;
+       if (test->uffd_data_handler)
+               *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+                                                     data_args.hva,
+                                                     data_args.paging_size,
+                                                     1, test->uffd_data_handler);
+}
+
+static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+                     struct uffd_desc *data_uffd)
+{
+       if (test->uffd_pt_handler)
+               uffd_stop_demand_paging(pt_uffd);
+       if (test->uffd_data_handler)
+               uffd_stop_demand_paging(data_uffd);
+
+       free(pt_args.copy);
+       free(data_args.copy);
+}
+
+static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+       TEST_FAIL("There was no UFFD fault expected.");
+       return -1;
+}
+
+/* Returns false if the test should be skipped. */
+static bool punch_hole_in_backing_store(struct kvm_vm *vm,
+                                       struct userspace_mem_region *region)
+{
+       void *hva = (void *)region->region.userspace_addr;
+       uint64_t paging_size = region->region.memory_size;
+       int ret, fd = region->fd;
+
+       if (fd != -1) {
+               ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                               0, paging_size);
+               TEST_ASSERT(ret == 0, "fallocate failed");
+       } else {
+               ret = madvise(hva, paging_size, MADV_DONTNEED);
+               TEST_ASSERT(ret == 0, "madvise failed");
+       }
+
+       return true;
+}
+
+static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+       struct userspace_mem_region *region;
+       void *hva;
+
+       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       hva = (void *)region->region.userspace_addr;
+
+       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+
+       memcpy(hva, run->mmio.data, run->mmio.len);
+       events.mmio_exits += 1;
+}
+
+static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+       uint64_t data;
+
+       memcpy(&data, run->mmio.data, sizeof(data));
+       pr_debug("addr=%lld len=%d w=%d data=%lx\n",
+                run->mmio.phys_addr, run->mmio.len,
+                run->mmio.is_write, data);
+       TEST_FAIL("There was no MMIO exit expected.");
+}
+
+static bool check_write_in_dirty_log(struct kvm_vm *vm,
+                                    struct userspace_mem_region *region,
+                                    uint64_t host_pg_nr)
+{
+       unsigned long *bmap;
+       bool first_page_dirty;
+       uint64_t size = region->region.memory_size;
+
+       /* getpage_size() is not always equal to vm->page_size */
+       bmap = bitmap_zalloc(size / getpagesize());
+       kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
+       first_page_dirty = test_bit(host_pg_nr, bmap);
+       free(bmap);
+       return first_page_dirty;
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool handle_cmd(struct kvm_vm *vm, int cmd)
+{
+       struct userspace_mem_region *data_region, *pt_region;
+       bool continue_test = true;
+       uint64_t pte_gpa, pte_pg;
+
+       data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+       pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
+
+       if (cmd == CMD_SKIP_TEST)
+               continue_test = false;
+
+       if (cmd & CMD_HOLE_PT)
+               continue_test = punch_hole_in_backing_store(vm, pt_region);
+       if (cmd & CMD_HOLE_DATA)
+               continue_test = punch_hole_in_backing_store(vm, data_region);
+       if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
+               TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
+                           "Missing write in dirty log");
+       if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
+               TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
+                           "Missing s1ptw write in dirty log");
+       if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
+               TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
+                           "Unexpected write in dirty log");
+       if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
+               TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
+                           "Unexpected s1ptw write in dirty log");
+
+       return continue_test;
+}
+
+void fail_vcpu_run_no_handler(int ret)
+{
+       TEST_FAIL("Unexpected vcpu run failure");
+}
+
+void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
+{
+       TEST_ASSERT(errno == ENOSYS,
+                   "The mmio handler should have returned not implemented.");
+       events.fail_vcpu_runs += 1;
+}
+
+typedef uint32_t aarch64_insn_t;
+extern aarch64_insn_t __exec_test[2];
+
+noinline void __return_0x77(void)
+{
+       asm volatile("__exec_test: mov x0, #0x77\n"
+                    "ret\n");
+}
+
+/*
+ * Note that this function runs on the host before the test VM starts: there's
+ * no need to sync the D$ and I$ caches.
+ */
+static void load_exec_code_for_test(struct kvm_vm *vm)
+{
+       uint64_t *code;
+       struct userspace_mem_region *region;
+       void *hva;
+
+       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       hva = (void *)region->region.userspace_addr;
+
+       assert(TEST_EXEC_GVA > TEST_GVA);
+       code = hva + TEST_EXEC_GVA - TEST_GVA;
+       memcpy(code, __exec_test, sizeof(__exec_test));
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+                                struct test_desc *test)
+{
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_DABT_CUR, no_dabt_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_IABT_CUR, no_iabt_handler);
+}
+
+static void setup_gva_maps(struct kvm_vm *vm)
+{
+       struct userspace_mem_region *region;
+       uint64_t pte_gpa;
+
+       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       /* Map TEST_GVA first. This will install a new PTE. */
+       virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
+       /* Then map TEST_PTE_GVA to the above PTE. */
+       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+       virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
+}
+
+enum pf_test_memslots {
+       CODE_AND_DATA_MEMSLOT,
+       PAGE_TABLE_MEMSLOT,
+       TEST_DATA_MEMSLOT,
+};
+
+/*
+ * Create a memslot for code and data at pfn=0, and test-data and PT ones
+ * at max_gfn.
+ */
+static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
+{
+       uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+       uint64_t guest_page_size = vm->page_size;
+       uint64_t max_gfn = vm_compute_max_gfn(vm);
+       /* Enough for 2M of code when using 4K guest pages. */
+       uint64_t code_npages = 512;
+       uint64_t pt_size, data_size, data_gpa;
+
+       /*
+        * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
+        * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs.  That's 13
+        * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
+        * twice that just in case.
+        */
+       pt_size = 26 * guest_page_size;
+
+       /* memslot sizes and gpa's must be aligned to the backing page size */
+       pt_size = align_up(pt_size, backing_src_pagesz);
+       data_size = align_up(guest_page_size, backing_src_pagesz);
+       data_gpa = (max_gfn * guest_page_size) - data_size;
+       data_gpa = align_down(data_gpa, backing_src_pagesz);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
+                                   CODE_AND_DATA_MEMSLOT, code_npages, 0);
+       vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
+       vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
+
+       vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
+                                   PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
+                                   p->test_desc->pt_memslot_flags);
+       vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
+
+       vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
+                                   data_size / guest_page_size,
+                                   p->test_desc->data_memslot_flags);
+       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void setup_ucall(struct kvm_vm *vm)
+{
+       struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+
+       ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
+}
+
+static void setup_default_handlers(struct test_desc *test)
+{
+       if (!test->mmio_handler)
+               test->mmio_handler = mmio_no_handler;
+
+       if (!test->fail_vcpu_run_handler)
+               test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
+}
+
+static void check_event_counts(struct test_desc *test)
+{
+       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+       struct test_desc *test = p->test_desc;
+
+       pr_debug("Test: %s\n", test->name);
+       pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+       pr_debug("Testing memory backing src type: %s\n",
+                vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+static void reset_event_counts(void)
+{
+       memset(&events, 0, sizeof(events));
+}
+
+/*
+ * This function either succeeds, skips the test (after setting test->skip), or
+ * fails with a TEST_FAIL that aborts all tests.
+ */
+static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+                         struct test_desc *test)
+{
+       struct kvm_run *run;
+       struct ucall uc;
+       int ret;
+
+       run = vcpu->run;
+
+       for (;;) {
+               ret = _vcpu_run(vcpu);
+               if (ret) {
+                       test->fail_vcpu_run_handler(ret);
+                       goto done;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       if (!handle_cmd(vm, uc.args[1])) {
+                               test->skip = true;
+                               goto done;
+                       }
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               case UCALL_NONE:
+                       if (run->exit_reason == KVM_EXIT_MMIO)
+                               test->mmio_handler(vm, run);
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+       struct test_params *p = (struct test_params *)arg;
+       struct test_desc *test = p->test_desc;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       struct uffd_desc *pt_uffd, *data_uffd;
+
+       print_test_banner(mode, p);
+
+       vm = ____vm_create(VM_SHAPE(mode));
+       setup_memslots(vm, p);
+       kvm_vm_elf_load(vm, program_invocation_name);
+       setup_ucall(vm);
+       vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+       setup_gva_maps(vm);
+
+       reset_event_counts();
+
+       /*
+        * Set some code in the data memslot for the guest to execute (only
+        * applicable to the EXEC tests). This has to be done before
+        * setup_uffd() as that function copies the memslot data for the uffd
+        * handler.
+        */
+       load_exec_code_for_test(vm);
+       setup_uffd(vm, p, &pt_uffd, &data_uffd);
+       setup_abort_handlers(vm, vcpu, test);
+       setup_default_handlers(test);
+       vcpu_args_set(vcpu, 1, test);
+
+       vcpu_run_loop(vm, vcpu, test);
+
+       kvm_vm_free(vm);
+       free_uffd(test, pt_uffd, data_uffd);
+
+       /*
+        * Make sure we check the events after the uffd threads have exited,
+        * which means they updated their respective event counters.
+        */
+       if (!test->skip)
+               check_event_counts(test);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-s mem-type]\n", name);
+       puts("");
+       guest_modes_help();
+       backing_src_help("-s");
+       puts("");
+}
+
+#define SNAME(s)                       #s
+#define SCAT2(a, b)                    SNAME(a ## _ ## b)
+#define SCAT3(a, b, c)                 SCAT2(a, SCAT2(b, c))
+#define SCAT4(a, b, c, d)              SCAT2(a, SCAT3(b, c, d))
+
+#define _CHECK(_test)                  _CHECK_##_test
+#define _PREPARE(_test)                        _PREPARE_##_test
+#define _PREPARE_guest_read64          NULL
+#define _PREPARE_guest_ld_preidx       NULL
+#define _PREPARE_guest_write64         NULL
+#define _PREPARE_guest_st_preidx       NULL
+#define _PREPARE_guest_exec            NULL
+#define _PREPARE_guest_at              NULL
+#define _PREPARE_guest_dc_zva          guest_check_dc_zva
+#define _PREPARE_guest_cas             guest_check_lse
+
+/* With or without access flag checks */
+#define _PREPARE_with_af               guest_set_ha, guest_clear_pte_af
+#define _PREPARE_no_af                 NULL
+#define _CHECK_with_af                 guest_check_pte_af
+#define _CHECK_no_af                   NULL
+
+/* Performs an access and checks that no faults were triggered. */
+#define TEST_ACCESS(_access, _with_af, _mark_cmd)                              \
+{                                                                              \
+       .name                   = SCAT3(_access, _with_af, #_mark_cmd),         \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .mem_mark_cmd           = _mark_cmd,                                    \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _CHECK(_with_af) },                         \
+       .expected_events        = { 0 },                                        \
+}
+
+#define TEST_UFFD(_access, _with_af, _mark_cmd,                                        \
+                 _uffd_data_handler, _uffd_pt_handler, _uffd_faults)           \
+{                                                                              \
+       .name                   = SCAT4(uffd, _access, _with_af, #_mark_cmd),   \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .mem_mark_cmd           = _mark_cmd,                                    \
+       .guest_test_check       = { _CHECK(_with_af) },                         \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = _uffd_pt_handler,                             \
+       .expected_events        = { .uffd_faults = _uffd_faults, },             \
+}
+
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check)              \
+{                                                                              \
+       .name                   = SCAT3(dirty_log, _access, _with_af),          \
+       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
+       .expected_events        = { 0 },                                        \
+}
+
+#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler,         \
+                               _uffd_faults, _test_check, _pt_check)           \
+{                                                                              \
+       .name                   = SCAT3(uffd_and_dirty_log, _access, _with_af), \
+       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
+       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = uffd_pt_handler,                              \
+       .expected_events        = { .uffd_faults = _uffd_faults, },             \
+}
+
+#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits)                   \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot, _access),                   \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .mmio_handler           = _mmio_handler,                                \
+       .expected_events        = { .mmio_exits = _mmio_exits },                \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME(_access)                                   \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
+       .expected_events        = { .fail_vcpu_runs = 1 },                      \
+}
+
+#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits,     \
+                                     _test_check)                              \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot, _access),                   \
+       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _test_check },                              \
+       .mmio_handler           = _mmio_handler,                                \
+       .expected_events        = { .mmio_exits = _mmio_exits},                 \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check)                \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_no_syn_and_dlog, _access),   \
+       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _test_check },                              \
+       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
+       .expected_events        = { .fail_vcpu_runs = 1 },                      \
+}
+
+#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits,          \
+                                _uffd_data_handler, _uffd_faults)              \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_uffd, _access),              \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = uffd_pt_handler,                              \
+       .mmio_handler           = _mmio_handler,                                \
+       .expected_events        = { .mmio_exits = _mmio_exits,                  \
+                                   .uffd_faults = _uffd_faults },              \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler,      \
+                                            _uffd_faults)                      \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = uffd_pt_handler,                      \
+       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
+       .expected_events        = { .fail_vcpu_runs = 1,                        \
+                                   .uffd_faults = _uffd_faults },              \
+}
+
+static struct test_desc tests[] = {
+
+       /* Check that HW is setting the Access Flag (AF) (sanity checks). */
+       TEST_ACCESS(guest_read64, with_af, CMD_NONE),
+       TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
+       TEST_ACCESS(guest_cas, with_af, CMD_NONE),
+       TEST_ACCESS(guest_write64, with_af, CMD_NONE),
+       TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
+       TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
+       TEST_ACCESS(guest_exec, with_af, CMD_NONE),
+
+       /*
+        * Punch a hole in the data backing store, and then try multiple
+        * accesses: reads should rturn zeroes, and writes should
+        * re-populate the page. Moreover, the test also check that no
+        * exception was generated in the guest.  Note that this
+        * reading/writing behavior is the same as reading/writing a
+        * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
+        * userspace.
+        */
+       TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
+
+       /*
+        * Punch holes in the data and PT backing stores and mark them for
+        * userfaultfd handling. This should result in 2 faults: the access
+        * on the data backing store, and its respective S1 page table walk
+        * (S1PTW).
+        */
+       TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       /*
+        * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
+        * The S1PTW fault should still be marked as a write.
+        */
+       TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_no_handler, uffd_pt_handler, 1),
+       TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+
+       /*
+        * Try accesses when the data and PT memory regions are both
+        * tracked for dirty logging.
+        */
+       TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
+                      guest_check_no_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_ld_preidx, with_af,
+                      guest_check_no_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
+                      guest_check_no_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+
+       /*
+        * Access when the data and PT memory regions are both marked for
+        * dirty logging and UFFD at the same time. The expected result is
+        * that writes should mark the dirty log and trigger a userfaultfd
+        * write fault.  Reads/execs should result in a read userfaultfd
+        * fault, and nothing in the dirty log.  Any S1PTW should result in
+        * a write in the dirty log and a userfaultfd write.
+        */
+       TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
+                               uffd_data_handler, 2,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_no_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
+                               uffd_data_handler,
+                               2, guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
+                               uffd_data_handler,
+                               2, guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
+                               uffd_data_handler,
+                               2, guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       /*
+        * Access when both the PT and data regions are marked read-only
+        * (with KVM_MEM_READONLY). Writes with a syndrome result in an
+        * MMIO exit, writes with no syndrome (e.g., CAS) result in a
+        * failed vcpu run, and reads/execs with and without syndroms do
+        * not fault.
+        */
+       TEST_RO_MEMSLOT(guest_read64, 0, 0),
+       TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
+       TEST_RO_MEMSLOT(guest_at, 0, 0),
+       TEST_RO_MEMSLOT(guest_exec, 0, 0),
+       TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
+       TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
+       TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
+       TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
+
+       /*
+        * The PT and data regions are both read-only and marked
+        * for dirty logging at the same time. The expected result is that
+        * for writes there should be no write in the dirty log. The
+        * readonly handling is the same as if the memslot was not marked
+        * for dirty logging: writes with a syndrome result in an MMIO
+        * exit, and writes with no syndrome result in a failed vcpu run.
+        */
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
+                                     1, guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
+                                                 guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
+                                                 guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
+                                                 guest_check_no_write_in_dirty_log),
+
+       /*
+        * The PT and data regions are both read-only and punched with
+        * holes tracked with userfaultfd.  The expected result is the
+        * union of both userfaultfd and read-only behaviors. For example,
+        * write accesses result in a userfaultfd write fault and an MMIO
+        * exit.  Writes with no syndrome result in a failed vcpu run and
+        * no userfaultfd write fault. Reads result in userfaultfd getting
+        * triggered.
+        */
+       TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
+                                uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
+
+       { 0 }
+};
+
+static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
+{
+       struct test_desc *t;
+
+       for (t = &tests[0]; t->name; t++) {
+               if (t->skip)
+                       continue;
+
+               struct test_params p = {
+                       .src_type = src_type,
+                       .test_desc = t,
+               };
+
+               for_each_guest_mode(run_test, &p);
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       enum vm_mem_backing_src_type src_type;
+       int opt;
+
+       src_type = DEFAULT_VM_MEM_SRC;
+
+       while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+               switch (opt) {
+               case 'm':
+                       guest_modes_cmdline(optarg);
+                       break;
+               case 's':
+                       src_type = parse_backing_src_type(optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       exit(0);
+               }
+       }
+
+       for_each_test_and_guest_mode(src_type);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c

new file mode 100644 (file)

index 0000000..ab491ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_test - Tests relating to KVM's PSCI implementation.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This test includes:
+ *  - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ *    and userspace reading the targeted vCPU's registers.
+ *  - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ *    KVM_SYSTEM_EVENT_SUSPEND UAPI.
+ */
+
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <asm/cputype.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+                           uint64_t context_id)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+                 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+                                  uint64_t lowest_affinity_level)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+                 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+                 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_features(uint32_t func_id)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+       struct kvm_mp_state mp_state = {
+               .mp_state = KVM_MP_STATE_STOPPED,
+       };
+
+       vcpu_mp_state_set(vcpu, &mp_state);
+}
+
+static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
+                              struct kvm_vcpu **target)
+{
+       struct kvm_vcpu_init init;
+       struct kvm_vm *vm;
+
+       vm = vm_create(2);
+
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+       *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
+       *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+
+       return vm;
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+               REPORT_GUEST_ASSERT(uc);
+}
+
+static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+       uint64_t obs_pc, obs_x0;
+
+       obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+       obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
+
+       TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+                   "unexpected target cpu pc: %lx (expected: %lx)",
+                   obs_pc, CPU_ON_ENTRY_ADDR);
+       TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+                   "unexpected target context id: %lx (expected: %lx)",
+                   obs_x0, CPU_ON_CONTEXT_ID);
+}
+
+static void guest_test_cpu_on(uint64_t target_cpu)
+{
+       uint64_t target_state;
+
+       GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+
+       do {
+               target_state = psci_affinity_info(target_cpu, 0);
+
+               GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+                            (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+       } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+       GUEST_DONE();
+}
+
+static void host_test_cpu_on(void)
+{
+       struct kvm_vcpu *source, *target;
+       uint64_t target_mpidr;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = setup_vm(guest_test_cpu_on, &source, &target);
+
+       /*
+        * make sure the target is already off when executing the test.
+        */
+       vcpu_power_off(target);
+
+       target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+       vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
+       enter_guest(source);
+
+       if (get_ucall(source, &uc) != UCALL_DONE)
+               TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+
+       assert_vcpu_reset(target);
+       kvm_vm_free(vm);
+}
+
+static void guest_test_system_suspend(void)
+{
+       uint64_t ret;
+
+       /* assert that SYSTEM_SUSPEND is discoverable */
+       GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
+       GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
+
+       ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
+       GUEST_SYNC(ret);
+}
+
+static void host_test_system_suspend(void)
+{
+       struct kvm_vcpu *source, *target;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+
+       vm = setup_vm(guest_test_system_suspend, &source, &target);
+       vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
+
+       vcpu_power_off(target);
+       run = source->run;
+
+       enter_guest(source);
+
+       TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
+       TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
+                   "Unhandled system event: %u (expected: %u)",
+                   run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
+
+       kvm_vm_free(vm);
+}
+
+static void guest_test_system_off2(void)
+{
+       uint64_t ret;
+
+       /* assert that SYSTEM_OFF2 is discoverable */
+       GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
+                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+       GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
+                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+
+       /* With non-zero 'cookie' field, it should fail */
+       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
+       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+       /*
+        * This would normally never return, so KVM sets the return value
+        * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
+        * that it can test both values for HIBERNATE_OFF.
+        */
+       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
+       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+       /*
+        * Revision F.b of the PSCI v1.3 specification documents zero as an
+        * alias for HIBERNATE_OFF, since that's the value used in earlier
+        * revisions of the spec and some implementations in the field.
+        */
+       ret = psci_system_off2(0, 1);
+       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+       ret = psci_system_off2(0, 0);
+       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+       GUEST_DONE();
+}
+
+static void host_test_system_off2(void)
+{
+       struct kvm_vcpu *source, *target;
+       struct kvm_mp_state mps;
+       uint64_t psci_version = 0;
+       int nr_shutdowns = 0;
+       struct kvm_run *run;
+       struct ucall uc;
+
+       setup_vm(guest_test_system_off2, &source, &target);
+
+       psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
+
+       TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
+                   "Unexpected PSCI version %lu.%lu",
+                   PSCI_VERSION_MAJOR(psci_version),
+                   PSCI_VERSION_MINOR(psci_version));
+
+       vcpu_power_off(target);
+       run = source->run;
+
+       enter_guest(source);
+       while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
+               TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
+                           "Unhandled system event: %u (expected: %u)",
+                           run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
+               TEST_ASSERT(run->system_event.ndata >= 1,
+                           "Unexpected amount of system event data: %u (expected, >= 1)",
+                           run->system_event.ndata);
+               TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
+                           "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
+                           run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
+
+               nr_shutdowns++;
+
+               /* Restart the vCPU */
+               mps.mp_state = KVM_MP_STATE_RUNNABLE;
+               vcpu_mp_state_set(source, &mps);
+
+               enter_guest(source);
+       }
+
+       TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
+       TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
+}
+
+int main(void)
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
+
+       host_test_cpu_on();
+       host_test_system_suspend();
+       host_test_system_off2();
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c

new file mode 100644 (file)

index 0000000..bc6cf50
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+       FTR_EXACT,                      /* Use a predefined safe value */
+       FTR_LOWER_SAFE,                 /* Smaller value is safe */
+       FTR_HIGHER_SAFE,                /* Bigger value is safe */
+       FTR_HIGHER_OR_ZERO_SAFE,        /* Bigger value is safe, but 0 is biggest */
+       FTR_END,                        /* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED     true    /* Value should be treated as signed */
+#define FTR_UNSIGNED   false   /* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+       char *name;
+       bool sign;
+       enum ftr_type type;
+       uint8_t shift;
+       uint64_t mask;
+       /*
+        * For FTR_EXACT, safe_val is used as the exact safe value.
+        * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+        */
+       int64_t safe_val;
+};
+
+struct test_feature_reg {
+       uint32_t reg;
+       const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL)      \
+       {                                                               \
+               .name = #NAME,                                          \
+               .sign = SIGNED,                                         \
+               .type = TYPE,                                           \
+               .shift = SHIFT,                                         \
+               .mask = MASK,                                           \
+               .safe_val = SAFE_VAL,                                   \
+       }
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+       __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+                      reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+       __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+                      reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END                                    \
+       {                                               \
+               .type = FTR_END,                        \
+       }
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+       REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+       REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+       REG_FTR_END,
+};
+
+#define TEST_REG(id, table)                    \
+       {                                       \
+               .reg = id,                      \
+               .ftr_bits = &((table)[0]),      \
+       }
+
+static struct test_feature_reg test_regs[] = {
+       TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+       TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+       TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+       TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+       TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+       TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+       TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
+       TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+       TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+       TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+       TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+       GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+       GUEST_REG_SYNC(SYS_CTR_EL0);
+
+       GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+       if (ftr_bits->sign == FTR_UNSIGNED) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = ftr_bits->safe_val;
+                       break;
+               case FTR_LOWER_SAFE:
+                       if (ftr > ftr_bits->safe_val)
+                               ftr--;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       if (ftr < ftr_max)
+                               ftr++;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr == ftr_max)
+                               ftr = 0;
+                       else if (ftr != 0)
+                               ftr++;
+                       break;
+               default:
+                       break;
+               }
+       } else if (ftr != ftr_max) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = ftr_bits->safe_val;
+                       break;
+               case FTR_LOWER_SAFE:
+                       if (ftr > ftr_bits->safe_val)
+                               ftr--;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       if (ftr < ftr_max - 1)
+                               ftr++;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr != 0 && ftr != ftr_max - 1)
+                               ftr++;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+       if (ftr_bits->sign == FTR_UNSIGNED) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+                       break;
+               case FTR_LOWER_SAFE:
+                       ftr++;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       ftr--;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr == 0)
+                               ftr = ftr_max;
+                       else
+                               ftr--;
+                       break;
+               default:
+                       break;
+               }
+       } else if (ftr != ftr_max) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+                       break;
+               case FTR_LOWER_SAFE:
+                       ftr++;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       ftr--;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr == 0)
+                               ftr = ftr_max - 1;
+                       else
+                               ftr--;
+                       break;
+               default:
+                       break;
+               }
+       } else {
+               ftr = 0;
+       }
+
+       return ftr;
+}
+
+static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+                                    const struct reg_ftr_bits *ftr_bits)
+{
+       uint8_t shift = ftr_bits->shift;
+       uint64_t mask = ftr_bits->mask;
+       uint64_t val, new_val, ftr;
+
+       val = vcpu_get_reg(vcpu, reg);
+       ftr = (val & mask) >> shift;
+
+       ftr = get_safe_value(ftr_bits, ftr);
+
+       ftr <<= shift;
+       val &= ~mask;
+       val |= ftr;
+
+       vcpu_set_reg(vcpu, reg, val);
+       new_val = vcpu_get_reg(vcpu, reg);
+       TEST_ASSERT_EQ(new_val, val);
+
+       return new_val;
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+                             const struct reg_ftr_bits *ftr_bits)
+{
+       uint8_t shift = ftr_bits->shift;
+       uint64_t mask = ftr_bits->mask;
+       uint64_t val, old_val, ftr;
+       int r;
+
+       val = vcpu_get_reg(vcpu, reg);
+       ftr = (val & mask) >> shift;
+
+       ftr = get_invalid_value(ftr_bits, ftr);
+
+       old_val = val;
+       ftr <<= shift;
+       val &= ~mask;
+       val |= ftr;
+
+       r = __vcpu_set_reg(vcpu, reg, val);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+       val = vcpu_get_reg(vcpu, reg);
+       TEST_ASSERT_EQ(val, old_val);
+}
+
+static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+
+#define encoding_to_range_idx(encoding)                                                        \
+       KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding),      \
+                                    sys_reg_CRn(encoding), sys_reg_CRm(encoding),      \
+                                    sys_reg_Op2(encoding))
+
+
+static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+       struct reg_mask_range range = {
+               .addr = (__u64)masks,
+       };
+       int ret;
+
+       /* KVM should return error when reserved field is not zero */
+       range.reserved[0] = 1;
+       ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+       TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+       /* Get writable masks for feature ID registers */
+       memset(range.reserved, 0, sizeof(range.reserved));
+       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+       for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+               const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+               uint32_t reg_id = test_regs[i].reg;
+               uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+               int idx;
+
+               /* Get the index to masks array for the idreg */
+               idx = encoding_to_range_idx(reg_id);
+
+               for (int j = 0;  ftr_bits[j].type != FTR_END; j++) {
+                       /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+                       if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+                               ksft_test_result_skip("%s on AARCH64 only system\n",
+                                                     ftr_bits[j].name);
+                               continue;
+                       }
+
+                       /* Make sure the feature field is writable */
+                       TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+                       test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+
+                       test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
+                                                                 &ftr_bits[j]);
+
+                       ksft_test_result_pass("%s\n", ftr_bits[j].name);
+               }
+       }
+}
+
+#define MPAM_IDREG_TEST        6
+static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
+{
+       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+       struct reg_mask_range range = {
+               .addr = (__u64)masks,
+       };
+       uint64_t val;
+       int idx, err;
+
+       /*
+        * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
+        * check that if it can be set to 1, (i.e. it is supported by the
+        * hardware), that it can't be set to other values.
+        */
+
+       /* Get writable masks for feature ID registers */
+       memset(range.reserved, 0, sizeof(range.reserved));
+       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+       /* Writeable? Nothing to test! */
+       idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
+       if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
+               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
+               return;
+       }
+
+       /* Get the id register value */
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+       /* Try to set MPAM=0. This should always be possible. */
+       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+       if (err)
+               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
+
+       /* Try to set MPAM=1 */
+       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+       if (err)
+               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
+
+       /* Try to set MPAM=2 */
+       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+       if (err)
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
+       else
+               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
+
+       /* And again for ID_AA64PFR1_EL1.MPAM_frac */
+       idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+       if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
+               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
+               return;
+       }
+
+       /* Get the id register value */
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+
+       /* Try to set MPAM_frac=0. This should always be possible. */
+       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+       if (err)
+               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
+
+       /* Try to set MPAM_frac=1 */
+       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+       if (err)
+               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
+
+       /* Try to set MPAM_frac=2 */
+       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+       if (err)
+               ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
+       else
+               ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+       bool done = false;
+       struct ucall uc;
+
+       while (!done) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_SYNC:
+                       /* Make sure the written values are seen by guest */
+                       TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
+                                      uc.args[3]);
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       }
+}
+
+/* Politely lifted from arch/arm64/include/asm/cache.h */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level)       (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level)                (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level)      \
+       (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static void test_clidr(struct kvm_vcpu *vcpu)
+{
+       uint64_t clidr;
+       int level;
+
+       clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
+
+       /* find the first empty level in the cache hierarchy */
+       for (level = 1; level < 7; level++) {
+               if (!CLIDR_CTYPE(clidr, level))
+                       break;
+       }
+
+       /*
+        * If you have a mind-boggling 7 levels of cache, congratulations, you
+        * get to fix this.
+        */
+       TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
+
+       /* stick in a unified cache level */
+       clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
+       test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
+}
+
+static void test_ctr(struct kvm_vcpu *vcpu)
+{
+       u64 ctr;
+
+       ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
+       ctr &= ~CTR_EL0_DIC_MASK;
+       if (ctr & CTR_EL0_IminLine_MASK)
+               ctr--;
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
+       test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+       u64 val;
+
+       test_clidr(vcpu);
+       test_ctr(vcpu);
+
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+       val++;
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+
+       test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
+       ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+{
+       size_t idx = encoding_to_range_idx(encoding);
+       uint64_t observed;
+
+       observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
+       TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+}
+
+static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
+        * architectural reset of the vCPU.
+        */
+       aarch64_vcpu_setup(vcpu, NULL);
+
+       for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
+               test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
+
+       test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
+       test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
+       test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+
+       ksft_test_result_pass("%s\n", __func__);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       bool aarch64_only;
+       uint64_t val, el0;
+       int test_cnt;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Check for AARCH64 only system */
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+       aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+
+       ksft_print_header();
+
+       test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+                  ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+                  ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+                  ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
+                  ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
+                  ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
+                  MPAM_IDREG_TEST;
+
+       ksft_set_plan(test_cnt);
+
+       test_vm_ftr_id_regs(vcpu, aarch64_only);
+       test_vcpu_ftr_id_regs(vcpu);
+       test_user_set_mpam_reg(vcpu);
+
+       test_guest_reg_read(vcpu);
+
+       test_reset_preserves_id_regs(vcpu);
+
+       kvm_vm_free(vm);
+
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c

new file mode 100644 (file)

index 0000000..2d189f3
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ *  - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ *    is prevented from filtering the architecture range of SMCCC calls.
+ *  - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+       HVC_INSN,
+       SMC_INSN,
+};
+
+#define for_each_conduit(conduit)                                      \
+       for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+       struct arm_smccc_res res;
+
+       if (conduit == SMC_INSN)
+               smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+       else
+               smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+       GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+                             enum kvm_smccc_filter_action action)
+{
+       struct kvm_smccc_filter filter = {
+               .base           = start,
+               .nr_functions   = nr_functions,
+               .action         = action,
+       };
+
+       return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+                                    KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+                            enum kvm_smccc_filter_action action)
+{
+       int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+       TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+       struct kvm_vcpu_init init;
+       struct kvm_vm *vm;
+
+       vm = vm_create(1);
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+
+       /*
+        * Enable in-kernel emulation of PSCI to ensure that calls are denied
+        * due to the SMCCC filter, not because of KVM.
+        */
+       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+       *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+       return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       struct kvm_smccc_filter filter = {
+               .base           = PSCI_0_2_FN_PSCI_VERSION,
+               .nr_functions   = 1,
+               .action         = KVM_SMCCC_FILTER_DENY,
+               .pad            = { -1 },
+       };
+       int r;
+
+       r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+                                 KVM_ARM_VM_SMCCC_FILTER, &filter);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       uint32_t smc64_fn;
+       int r;
+
+       r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+                              1, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EEXIST,
+                   "Attempt to filter reserved range should return EEXIST");
+
+       smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+                                     0, 0);
+
+       r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EEXIST,
+                   "Attempt to filter reserved range should return EEXIST");
+
+       kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Attempt to filter 0 functions should return EINVAL");
+
+       kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Attempt to overflow filter range should return EINVAL");
+
+       kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Attempt to use reserved filter action should return EINVAL");
+
+       kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EEXIST,
+                   "Attempt to filter already configured range should return EEXIST");
+
+       kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+
+       TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+                   "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+       enum smccc_conduit conduit;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       for_each_conduit(conduit) {
+               vm = setup_vm(&vcpu);
+
+               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+               vcpu_run(vcpu);
+               expect_call_denied(vcpu);
+
+               kvm_vm_free(vm);
+       }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+                                   enum smccc_conduit conduit)
+{
+       struct kvm_run *run = vcpu->run;
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+                   "Unexpected exit reason: %u", run->exit_reason);
+       TEST_ASSERT(run->hypercall.nr == func_id,
+                   "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+       if (conduit == SMC_INSN)
+               TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+                           "KVM_HYPERCALL_EXIT_SMC is not set");
+       else
+               TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+                           "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+       enum smccc_conduit conduit;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       for_each_conduit(conduit) {
+               vm = setup_vm(&vcpu);
+
+               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+               vcpu_run(vcpu);
+               expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+               kvm_vm_free(vm);
+       }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+       struct kvm_vm *vm = vm_create_barebones();
+       int r;
+
+       r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+       kvm_vm_free(vm);
+       return !r;
+}
+
+int main(void)
+{
+       TEST_REQUIRE(kvm_supports_smccc_filter());
+
+       test_pad_must_be_zero();
+       test_invalid_nr_functions();
+       test_overflow_nr_functions();
+       test_reserved_action();
+       test_filter_reserved_range();
+       test_filter_overlap();
+       test_filter_denied();
+       test_filter_fwd_to_user();
+}
diff --git a/tools/testing/selftests/kvm/arm64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c

new file mode 100644 (file)

index 0000000..80b74c6
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init0,
+                          struct kvm_vcpu_init *init1)
+{
+       struct kvm_vcpu *vcpu0, *vcpu1;
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones();
+
+       vcpu0 = __vm_vcpu_add(vm, 0);
+       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+       if (ret)
+               goto free_exit;
+
+       vcpu1 = __vm_vcpu_add(vm, 1);
+       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+       kvm_vm_free(vm);
+       return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
+                                 struct kvm_vcpu_init *init1)
+{
+       struct kvm_vcpu *vcpu0, *vcpu1;
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones();
+
+       vcpu0 = __vm_vcpu_add(vm, 0);
+       vcpu1 = __vm_vcpu_add(vm, 1);
+
+       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+       if (ret)
+               goto free_exit;
+
+       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+       kvm_vm_free(vm);
+       return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+       struct kvm_vcpu_init init0, init1;
+       struct kvm_vm *vm;
+       int ret;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
+
+       /* Get the preferred target type and copy that to init1 for later use */
+       vm = vm_create_barebones();
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
+       kvm_vm_free(vm);
+       init1 = init0;
+
+       /* Test with 64bit vCPUs */
+       ret = add_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+       /* Test with 32bit vCPUs */
+       init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+       ret = add_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+       /* Test with mixed-width vCPUs  */
+       init0.features[0] = 0;
+       init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+       ret = add_init_2vcpus(&init0, &init1);
+       TEST_ASSERT(ret != 0,
+                   "Configuring mixed-width vCPUs worked unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init0, &init1);
+       TEST_ASSERT(ret != 0,
+                   "Configuring mixed-width vCPUs worked unexpectedly");
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c

new file mode 100644 (file)

index 0000000..b3b5fb0
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic init sequence tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS               4
+
+#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
+
+#define GICR_TYPER 0x8
+
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
+struct vm_gic {
+       struct kvm_vm *vm;
+       int gic_fd;
+       uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+/*
+ * Helpers to access a redistributor register and verify the ioctl() failed or
+ * succeeded as expected, and provided the correct value on success.
+ */
+static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
+                                   int want, const char *msg)
+{
+       uint32_t ignored_val;
+       int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                                       REG_OFFSET(vcpu, offset), &ignored_val);
+
+       TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
+}
+
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+                             const char *msg)
+{
+       uint32_t val;
+
+       kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                           REG_OFFSET(vcpu, offset), &val);
+       TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
+}
+
+/* dummy guest code */
+static void guest_code(void)
+{
+       GUEST_SYNC(0);
+       GUEST_SYNC(1);
+       GUEST_SYNC(2);
+       GUEST_DONE();
+}
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+       return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
+                                             uint32_t nr_vcpus,
+                                             struct kvm_vcpu *vcpus[])
+{
+       struct vm_gic v;
+
+       v.gic_dev_type = gic_dev_type;
+       v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+       return v;
+}
+
+static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
+{
+       struct vm_gic v;
+
+       v.gic_dev_type = gic_dev_type;
+       v.vm = vm_create_barebones();
+       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+       return v;
+}
+
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+       close(v->gic_fd);
+       kvm_vm_free(v->vm);
+}
+
+struct vgic_region_attr {
+       uint64_t attr;
+       uint64_t size;
+       uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+       .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+       .size = 0x10000,
+       .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+       .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+       .size = NR_VCPUS * 0x20000,
+       .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+       .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+       .size = 0x1000,
+       .alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+       .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+       .size = 0x2000,
+       .alignment = 0x1000,
+};
+
+/**
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
+ */
+static void subtest_dist_rdist(struct vm_gic *v)
+{
+       int ret;
+       uint64_t addr;
+       struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+       struct vgic_region_attr dist;
+
+       rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+                                               : gic_v2_cpu_region;
+       dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+                                               : gic_v2_dist_region;
+
+       /* Check existing group/attributes */
+       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
+
+       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
+
+       /* check non existing attribute */
+       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
+       TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
+
+       /* misaligned DIST and REDIST address settings */
+       addr = dist.alignment / 0x10;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   dist.attr, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
+
+       addr = rdist.alignment / 0x10;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
+
+       /* out of range address */
+       addr = max_phys_size;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   dist.attr, &addr);
+       TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+       /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+       addr = max_phys_size - dist.alignment;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+                       "half of the redist is beyond IPA limit");
+
+       /* set REDIST base address @0x0*/
+       addr = 0x00000;
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           rdist.attr, &addr);
+
+       /* Attempt to create a second legacy redistributor region */
+       addr = 0xE0000;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
+
+       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                    KVM_VGIC_V3_ADDR_TYPE_REDIST);
+       if (!ret) {
+               /* Attempt to mix legacy and new redistributor regions */
+               addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+               ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+               TEST_ASSERT(ret && errno == EINVAL,
+                           "attempt to mix GICv3 REDIST and REDIST_REGION");
+       }
+
+       /*
+        * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
+        * on first vcpu run instead.
+        */
+       addr = rdist.size - rdist.alignment;
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           dist.attr, &addr);
+}
+
+/* Test the new REDIST region API */
+static void subtest_v3_redist_regions(struct vm_gic *v)
+{
+       uint64_t addr, expected_addr;
+       int ret;
+
+       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST);
+       TEST_ASSERT(!ret, "Multiple redist regions advertised");
+
+       addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "attempt to register the first rdist region with index != 0");
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "register an rdist region overlapping with another one");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+                   "register redist region with base address beyond IPA range");
+
+       /* The last redist is above the pa range. */
+       addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+                   "register redist region with top address beyond IPA range");
+
+       addr = 0x260000;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
+
+       /*
+        * Now there are 2 redist regions:
+        * region 0 @ 0x200000 2 redists
+        * region 1 @ 0x240000 1 redist
+        * Attempt to read their characteristics
+        */
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
+       expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
+       expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
+       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
+
+       addr = 0x260000;
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
+}
+
+/*
+ * VGIC KVM device is created and initialized before the secondary CPUs
+ * get created
+ */
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       int ret, i;
+
+       v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
+
+       subtest_dist_rdist(&v);
+
+       /* Add the rest of the VCPUs */
+       for (i = 1; i < NR_VCPUS; ++i)
+               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+/* All the VCPUs are created before the VGIC KVM device gets initialized */
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       int ret;
+
+       v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
+
+       subtest_dist_rdist(&v);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+#define KVM_VGIC_V2_ATTR(offset, cpu) \
+       (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
+        FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
+
+#define GIC_CPU_CTRL   0x00
+
+static void test_v2_uaccess_cpuif_no_vcpus(void)
+{
+       struct vm_gic v;
+       u64 val = 0;
+       int ret;
+
+       v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
+       subtest_dist_rdist(&v);
+
+       ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "accessed non-existent CPU interface, want errno: %i",
+                   EINVAL);
+       ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "accessed non-existent CPU interface, want errno: %i",
+                   EINVAL);
+       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "accessed non-existent CPU interface, want errno: %i",
+                   EINVAL);
+
+       vm_gic_destroy(&v);
+}
+
+static void test_v3_new_redist_regions(void)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       void *dummy = NULL;
+       struct vm_gic v;
+       uint64_t addr;
+       int ret;
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       subtest_v3_redist_regions(&v);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
+       vm_gic_destroy(&v);
+
+       /* step2 */
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       subtest_v3_redist_regions(&v);
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
+
+       vm_gic_destroy(&v);
+
+       /* step 3 */
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       subtest_v3_redist_regions(&v);
+
+       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
+       TEST_ASSERT(ret && errno == EFAULT,
+                   "register a third region allowing to cover the 4 vcpus");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(!ret, "vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+static void test_v3_typer_accesses(void)
+{
+       struct vm_gic v;
+       uint64_t addr;
+       int ret, i;
+
+       v.vm = vm_create(NR_VCPUS);
+       (void)vm_vcpu_add(v.vm, 0, guest_code);
+
+       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+       (void)vm_vcpu_add(v.vm, 3, guest_code);
+
+       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
+                               "attempting to read GICR_TYPER of non created vcpu");
+
+       (void)vm_vcpu_add(v.vm, 1, guest_code);
+
+       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
+                               "read GICR_TYPER before GIC initialized");
+
+       (void)vm_vcpu_add(v.vm, 2, guest_code);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       for (i = 0; i < NR_VCPUS ; i++) {
+               v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
+                                 "read GICR_TYPER before rdist region setting");
+       }
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       /* The 2 first rdists should be put there (vcpu 0 and 3) */
+       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
+       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
+
+       addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
+       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
+
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
+                         "no redist region attached to vcpu #1 yet, last cannot be returned");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
+                         "no redist region attached to vcpu #2, last cannot be returned");
+
+       addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
+                         "read typer of rdist #1, last properly returned");
+
+       vm_gic_destroy(&v);
+}
+
+static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
+                                                  uint32_t vcpuids[])
+{
+       struct vm_gic v;
+       int i;
+
+       v.vm = vm_create(nr_vcpus);
+       for (i = 0; i < nr_vcpus; i++)
+               vm_vcpu_add(v.vm, vcpuids[i], guest_code);
+
+       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+       return v;
+}
+
+/**
+ * Test GICR_TYPER last bit with new redist regions
+ * rdist regions #1 and #2 are contiguous
+ * rdist region #0 @0x100000 2 rdist capacity
+ *     rdists: 0, 3 (Last)
+ * rdist region #1 @0x240000 2 rdist capacity
+ *     rdists:  5, 4 (Last)
+ * rdist region #2 @0x200000 2 rdist capacity
+ *     rdists: 1, 2
+ */
+static void test_v3_last_bit_redist_regions(void)
+{
+       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+       struct vm_gic v;
+       uint64_t addr;
+
+       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
+       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
+       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
+       v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
+
+       vm_gic_destroy(&v);
+}
+
+/* Test last bit with legacy region */
+static void test_v3_last_bit_single_rdist(void)
+{
+       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+       struct vm_gic v;
+       uint64_t addr;
+
+       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       addr = 0x10000;
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
+       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
+
+       vm_gic_destroy(&v);
+}
+
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       int ret, i;
+       uint64_t addr;
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
+
+       /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+       addr = max_phys_size - (3 * 2 * 0x10000);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+       addr = 0x00000;
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+       /* Add the rest of the VCPUs */
+       for (i = 1; i < NR_VCPUS; ++i)
+               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       /* Attempt to run a vcpu without enough redist space. */
+       ret = run_vcpu(vcpus[2]);
+       TEST_ASSERT(ret && errno == EINVAL,
+               "redist base+size above PA range detected on 1st vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       uint64_t addr;
+       int its_fd, ret;
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+
+       addr = 0x401000;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+               "ITS region with misaligned address");
+
+       addr = max_phys_size;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+               "register ITS region with base address beyond IPA range");
+
+       addr = max_phys_size - 0x10000;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+               "Half of ITS region is beyond IPA range");
+
+       /* This one succeeds setting the ITS base */
+       addr = 0x400000;
+       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_ITS_ADDR_TYPE, &addr);
+
+       addr = 0x300000;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+       close(its_fd);
+       vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       uint32_t other;
+       int ret;
+
+       v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+       /* try to create a non existing KVM device */
+       ret = __kvm_test_create_device(v.vm, 0);
+       TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+       /* trial mode */
+       ret = __kvm_test_create_device(v.vm, gic_dev_type);
+       if (ret)
+               return ret;
+       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+       ret = __kvm_create_device(v.vm, gic_dev_type);
+       TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+       /* try to create the other gic_dev_type */
+       other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+                                            : KVM_DEV_TYPE_ARM_VGIC_V2;
+
+       if (!__kvm_test_create_device(v.vm, other)) {
+               ret = __kvm_create_device(v.vm, other);
+               TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
+                               "create GIC device while other version exists");
+       }
+
+       vm_gic_destroy(&v);
+
+       return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+       test_vcpus_then_vgic(gic_dev_type);
+       test_vgic_then_vcpus(gic_dev_type);
+
+       if (VGIC_DEV_IS_V2(gic_dev_type))
+               test_v2_uaccess_cpuif_no_vcpus();
+
+       if (VGIC_DEV_IS_V3(gic_dev_type)) {
+               test_v3_new_redist_regions();
+               test_v3_typer_accesses();
+               test_v3_last_bit_redist_regions();
+               test_v3_last_bit_single_rdist();
+               test_v3_redist_ipa_range_check_at_vcpu_run();
+               test_v3_its_region();
+       }
+}
+
+int main(int ac, char **av)
+{
+       int ret;
+       int pa_bits;
+       int cnt_impl = 0;
+
+       pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+       max_phys_size = 1ULL << pa_bits;
+
+       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+       if (!ret) {
+               pr_info("Running GIC_v3 tests.\n");
+               run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+               cnt_impl++;
+       }
+
+       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+       if (!ret) {
+               pr_info("Running GIC_v2 tests.\n");
+               run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+               cnt_impl++;
+       }
+
+       if (!cnt_impl) {
+               print_skip("No GICv2 nor GICv3 support");
+               exit(KSFT_SKIP);
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c

new file mode 100644 (file)

index 0000000..f4ac28d
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -0,0 +1,847 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+       uint32_t nr_irqs; /* number of KVM supported IRQs. */
+       bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+       bool level_sensitive; /* 1 is level, 0 is edge */
+       int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+       bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS          32
+#define KVM_PRIO_SHIFT         3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS         (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO            (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK          (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO       (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG   (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+       KVM_INJECT_EDGE_IRQ_LINE = 1,
+       KVM_SET_IRQ_LINE,
+       KVM_SET_IRQ_LINE_HIGH,
+       KVM_SET_LEVEL_INFO_HIGH,
+       KVM_INJECT_IRQFD,
+       KVM_WRITE_ISPENDR,
+       KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+       kvm_inject_cmd cmd;
+       uint32_t first_intid;
+       uint32_t num;
+       int level;
+       bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+               uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+               struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure)                     \
+       kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num)                                      \
+       _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure)                                        \
+       _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid)                                                 \
+       _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid)                                               \
+       kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+       kvm_inject_cmd cmd;
+       /* can inject PPIs, PPIs, and/or SPIs. */
+       bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+       /*                                      sgi    ppi    spi */
+       { KVM_INJECT_EDGE_IRQ_LINE,             false, false, true },
+       { KVM_INJECT_IRQFD,                     false, false, true },
+       { KVM_WRITE_ISPENDR,                    true,  false, true },
+       { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+       /*                                      sgi    ppi    spi */
+       { KVM_SET_IRQ_LINE_HIGH,                false, true,  true },
+       { KVM_SET_LEVEL_INFO_HIGH,              false, true,  true },
+       { KVM_INJECT_IRQFD,                     false, false, true },
+       { KVM_WRITE_ISPENDR,                    false, true,  true },
+       { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+       /*                                      sgi    ppi    spi */
+       { KVM_WRITE_ISACTIVER,                  true,  true,  true },
+       { 0, },
+};
+
+#define for_each_inject_fn(t, f)                                               \
+       for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f)                               \
+       for_each_inject_fn(t, f)                                                \
+               if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f)                             \
+       for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+       int i;
+
+       irq_handled = 0;
+       for (i = 0; i <= MAX_SPI; i++)
+               irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+       uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
+
+       dsb(sy);
+       return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+       write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
+       isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+       uint32_t intid = gic_get_and_ack_irq();
+
+       if (intid == IAR_SPURIOUS)
+               return;
+
+       GUEST_ASSERT(gic_irq_get_active(intid));
+
+       if (!level_sensitive)
+               GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+       if (level_sensitive)
+               guest_set_irq_line(intid, 0);
+
+       GUEST_ASSERT(intid < MAX_SPI);
+       irqnr_received[intid] += 1;
+       irq_handled += 1;
+
+       gic_set_eoi(intid);
+       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+       if (eoi_split)
+               gic_set_dir(intid);
+
+       GUEST_ASSERT(!gic_irq_get_active(intid));
+       GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+               uint32_t num, int level, bool expect_failure)
+{
+       struct kvm_inject_args args = {
+               .cmd = cmd,
+               .first_intid = first_intid,
+               .num = num,
+               .level = level,
+               .expect_failure = expect_failure,
+       };
+       GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY()                                               \
+do {                                                                           \
+       uint32_t _intid;                                                        \
+       _intid = gic_get_and_ack_irq();                                         \
+       GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS);                    \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev)                                 \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs)                 \
+{                                                                              \
+       guest_irq_generic_handler(split, lev);                                  \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+       {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+       {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+       int i;
+
+       for (i = 0; i < args->nr_irqs; i++)
+               gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+       kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+               uint32_t intid, kvm_inject_cmd cmd)
+{
+       reset_stats();
+
+       _KVM_INJECT(cmd, intid, true);
+       /* no IRQ to handle on entry */
+
+       GUEST_ASSERT_EQ(irq_handled, 0);
+       GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+               uint32_t first_intid, uint32_t num,
+               kvm_inject_cmd cmd)
+{
+       uint32_t i;
+
+       reset_stats();
+
+       /* Cycle over all priorities to make things more interesting. */
+       for (i = first_intid; i < num + first_intid; i++)
+               gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+       asm volatile("msr daifset, #2" : : : "memory");
+       KVM_INJECT_MULTI(cmd, first_intid, num);
+
+       while (irq_handled < num) {
+               wfi();
+               local_irq_enable();
+               isb(); /* handle IRQ */
+               local_irq_disable();
+       }
+       local_irq_enable();
+
+       GUEST_ASSERT_EQ(irq_handled, num);
+       for (i = first_intid; i < num + first_intid; i++)
+               GUEST_ASSERT_EQ(irqnr_received[i], 1);
+       GUEST_ASSERT_IAR_EMPTY();
+
+       reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs).  This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+               uint32_t first_intid, uint32_t num,
+               kvm_inject_cmd cmd)
+{
+       uint32_t prio, intid, ap1r;
+       int i;
+
+       /*
+        * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+        * in descending order, so intid+1 can preempt intid.
+        */
+       for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+               GUEST_ASSERT(prio >= 0);
+               intid = i + first_intid;
+               gic_set_priority(intid, prio);
+       }
+
+       /*
+        * In a real migration, KVM would restore all GIC state before running
+        * guest code.
+        */
+       for (i = 0; i < num; i++) {
+               intid = i + first_intid;
+               KVM_ACTIVATE(cmd, intid);
+               ap1r = gic_read_ap1r0();
+               ap1r |= 1U << i;
+               gic_write_ap1r0(ap1r);
+       }
+
+       /* This is where the "migration" would occur. */
+
+       /* finish handling the IRQs starting with the highest priority one. */
+       for (i = 0; i < num; i++) {
+               intid = num - i - 1 + first_intid;
+               gic_set_eoi(intid);
+               if (args->eoi_split)
+                       gic_set_dir(intid);
+       }
+
+       for (i = 0; i < num; i++)
+               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+       GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+       uint32_t intid;
+
+       do {
+               asm volatile("wfi" : : : "memory");
+               intid = gic_get_and_ack_irq();
+       } while (intid == IAR_SPURIOUS);
+
+       return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions.  This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+               uint32_t first_intid, int num,
+               kvm_inject_cmd cmd)
+{
+       uint32_t intid, prio, step = KVM_PRIO_STEPS;
+       int i;
+
+       /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+        * in descending order, so intid+1 can preempt intid.
+        */
+       for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+               GUEST_ASSERT(prio >= 0);
+               intid = i + first_intid;
+               gic_set_priority(intid, prio);
+       }
+
+       local_irq_disable();
+
+       for (i = 0; i < num; i++) {
+               uint32_t tmp;
+               intid = i + first_intid;
+               KVM_INJECT(cmd, intid);
+               /* Each successive IRQ will preempt the previous one. */
+               tmp = wait_for_and_activate_irq();
+               GUEST_ASSERT_EQ(tmp, intid);
+               if (args->level_sensitive)
+                       guest_set_irq_line(intid, 0);
+       }
+
+       /* finish handling the IRQs starting with the highest priority one. */
+       for (i = 0; i < num; i++) {
+               intid = num - i - 1 + first_intid;
+               gic_set_eoi(intid);
+               if (args->eoi_split)
+                       gic_set_dir(intid);
+       }
+
+       local_irq_enable();
+
+       for (i = 0; i < num; i++)
+               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+       GUEST_ASSERT_IAR_EMPTY();
+
+       reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+       uint32_t nr_irqs = args->nr_irqs;
+
+       if (f->sgi) {
+               guest_inject(args, MIN_SGI, 1, f->cmd);
+               guest_inject(args, 0, 16, f->cmd);
+       }
+
+       if (f->ppi)
+               guest_inject(args, MIN_PPI, 1, f->cmd);
+
+       if (f->spi) {
+               guest_inject(args, MIN_SPI, 1, f->cmd);
+               guest_inject(args, nr_irqs - 1, 1, f->cmd);
+               guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+       }
+}
+
+static void test_injection_failure(struct test_args *args,
+               struct kvm_inject_desc *f)
+{
+       uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+               test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+       /*
+        * Test up to 4 levels of preemption. The reason is that KVM doesn't
+        * currently implement the ability to have more than the number-of-LRs
+        * number of concurrently active IRQs. The number of LRs implemented is
+        * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+        */
+       if (f->sgi)
+               test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+       if (f->ppi)
+               test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+       if (f->spi)
+               test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+       /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+       if (f->sgi)
+               guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+       if (f->ppi)
+               guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+       if (f->spi)
+               guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args *args)
+{
+       uint32_t i, nr_irqs = args->nr_irqs;
+       bool level_sensitive = args->level_sensitive;
+       struct kvm_inject_desc *f, *inject_fns;
+
+       gic_init(GIC_V3, 1);
+
+       for (i = 0; i < nr_irqs; i++)
+               gic_irq_enable(i);
+
+       for (i = MIN_SPI; i < nr_irqs; i++)
+               gic_irq_set_config(i, !level_sensitive);
+
+       gic_set_eoi_split(args->eoi_split);
+
+       reset_priorities(args);
+       gic_set_priority_mask(CPU_PRIO_MASK);
+
+       inject_fns  = level_sensitive ? inject_level_fns
+                                     : inject_edge_fns;
+
+       local_irq_enable();
+
+       /* Start the tests. */
+       for_each_supported_inject_fn(args, inject_fns, f) {
+               test_injection(args, f);
+               test_preemption(args, f);
+               test_injection_failure(args, f);
+       }
+
+       /*
+        * Restore the active state of IRQs. This would happen when live
+        * migrating IRQs in the middle of being handled.
+        */
+       for_each_supported_activate_fn(args, set_active_fns, f)
+               test_restore_active(args, f);
+
+       GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+                       struct test_args *test_args, bool expect_failure)
+{
+       int ret;
+
+       if (!expect_failure) {
+               kvm_arm_irq_line(vm, intid, level);
+       } else {
+               /* The interface doesn't allow larger intid's. */
+               if (intid > KVM_ARM_IRQ_NUM_MASK)
+                       return;
+
+               ret = _kvm_arm_irq_line(vm, intid, level);
+               TEST_ASSERT(ret != 0 && errno == EINVAL,
+                               "Bad intid %i did not cause KVM_IRQ_LINE "
+                               "error: rc: %i errno: %i", intid, ret, errno);
+       }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+                       bool expect_failure)
+{
+       if (!expect_failure) {
+               kvm_irq_set_level_info(gic_fd, intid, level);
+       } else {
+               int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+               /*
+                * The kernel silently fails for invalid SPIs and SGIs (which
+                * are not level-sensitive). It only checks for intid to not
+                * spill over 1U << 10 (the max reserved SPI). Also, callers
+                * are supposed to mask the intid with 0x3ff (1023).
+                */
+               if (intid > VGIC_MAX_RESERVED)
+                       TEST_ASSERT(ret != 0 && errno == EINVAL,
+                               "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+                               "error: rc: %i errno: %i", intid, ret, errno);
+               else
+                       TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+                               "for intid %i failed, rc: %i errno: %i",
+                               intid, ret, errno);
+       }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+               bool expect_failure)
+{
+       struct kvm_irq_routing *routing;
+       int ret;
+       uint64_t i;
+
+       assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+       routing = kvm_gsi_routing_create();
+       for (i = intid; i < (uint64_t)intid + num; i++)
+               kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+       if (!expect_failure) {
+               kvm_gsi_routing_write(vm, routing);
+       } else {
+               ret = _kvm_gsi_routing_write(vm, routing);
+               /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+               if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
+                       TEST_ASSERT(ret != 0 && errno == EINVAL,
+                               "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+                               "error: rc: %i errno: %i", intid, ret, errno);
+               else
+                       TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+                               "for intid %i failed, rc: %i errno: %i",
+                               intid, ret, errno);
+       }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+                                       struct kvm_vcpu *vcpu,
+                                       bool expect_failure)
+{
+       /*
+        * Ignore this when expecting failure as invalid intids will lead to
+        * either trying to inject SGIs when we configured the test to be
+        * level_sensitive (or the reverse), or inject large intids which
+        * will lead to writing above the ISPENDR register space (and we
+        * don't want to do that either).
+        */
+       if (!expect_failure)
+               kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+               bool expect_failure)
+{
+       int fd[MAX_SPI];
+       uint64_t val;
+       int ret, f;
+       uint64_t i;
+
+       /*
+        * There is no way to try injecting an SGI or PPI as the interface
+        * starts counting from the first SPI (above the private ones), so just
+        * exit.
+        */
+       if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+               return;
+
+       kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+                       kvm_max_routes, expect_failure);
+
+       /*
+        * If expect_failure, then just to inject anyway. These
+        * will silently fail. And in any case, the guest will check
+        * that no actual interrupt was injected for those cases.
+        */
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+               fd[f] = eventfd(0, 0);
+               TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
+       }
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+               struct kvm_irqfd irqfd = {
+                       .fd  = fd[f],
+                       .gsi = i - MIN_SPI,
+               };
+               assert(i <= (uint64_t)UINT_MAX);
+               vm_ioctl(vm, KVM_IRQFD, &irqfd);
+       }
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+               val = 1;
+               ret = write(fd[f], &val, sizeof(uint64_t));
+               TEST_ASSERT(ret == sizeof(uint64_t),
+                           __KVM_SYSCALL_ERROR("write()", ret));
+       }
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+               close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i)                                     \
+       for ((tmp) = (i) = (first);                                             \
+               (tmp) < (uint64_t)(first) + (uint64_t)(num);                    \
+               (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
+                         struct kvm_inject_args *inject_args,
+                         struct test_args *test_args)
+{
+       kvm_inject_cmd cmd = inject_args->cmd;
+       uint32_t intid = inject_args->first_intid;
+       uint32_t num = inject_args->num;
+       int level = inject_args->level;
+       bool expect_failure = inject_args->expect_failure;
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t tmp;
+       uint32_t i;
+
+       /* handles the valid case: intid=0xffffffff num=1 */
+       assert(intid < UINT_MAX - num || num == 1);
+
+       switch (cmd) {
+       case KVM_INJECT_EDGE_IRQ_LINE:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, 1, test_args,
+                                       expect_failure);
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, 0, test_args,
+                                       expect_failure);
+               break;
+       case KVM_SET_IRQ_LINE:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, level, test_args,
+                                       expect_failure);
+               break;
+       case KVM_SET_IRQ_LINE_HIGH:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, 1, test_args,
+                                       expect_failure);
+               break;
+       case KVM_SET_LEVEL_INFO_HIGH:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_set_level_info_check(gic_fd, i, 1,
+                                       expect_failure);
+               break;
+       case KVM_INJECT_IRQFD:
+               kvm_routing_and_irqfd_check(vm, intid, num,
+                                       test_args->kvm_max_routes,
+                                       expect_failure);
+               break;
+       case KVM_WRITE_ISPENDR:
+               for (i = intid; i < intid + num; i++)
+                       kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
+                                                   expect_failure);
+               break;
+       case KVM_WRITE_ISACTIVER:
+               for (i = intid; i < intid + num; i++)
+                       kvm_irq_write_isactiver(gic_fd, i, vcpu);
+               break;
+       default:
+               break;
+       }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+               struct kvm_inject_args *args)
+{
+       struct kvm_inject_args *kvm_args_hva;
+       vm_vaddr_t kvm_args_gva;
+
+       kvm_args_gva = uc->args[1];
+       kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+       memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+       printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+                       args->nr_irqs, args->level_sensitive,
+                       args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+       struct ucall uc;
+       int gic_fd;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_inject_args inject_args;
+       vm_vaddr_t args_gva;
+
+       struct test_args args = {
+               .nr_irqs = nr_irqs,
+               .level_sensitive = level_sensitive,
+               .eoi_split = eoi_split,
+               .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+               .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+       };
+
+       print_args(&args);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       /* Setup the guest args page (so it gets the args). */
+       args_gva = vm_vaddr_alloc_page(vm);
+       memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+       vcpu_args_set(vcpu, 1, args_gva);
+
+       gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
+       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
+
+       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+               guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       kvm_inject_get_call(vm, &uc, &inject_args);
+                       run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       close(gic_fd);
+       kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+       printf(
+       "\n"
+       "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+       printf(" -n: specify number of IRQs to setup the vgic with. "
+               "It has to be a multiple of 32 and between 64 and 1024.\n");
+       printf(" -e: if 1 then EOI is split into a write to DIR on top "
+               "of writing EOI.\n");
+       printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+       puts("");
+       exit(1);
+}
+
+int main(int argc, char **argv)
+{
+       uint32_t nr_irqs = 64;
+       bool default_args = true;
+       bool level_sensitive = false;
+       int opt;
+       bool eoi_split = false;
+
+       while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+               switch (opt) {
+               case 'n':
+                       nr_irqs = atoi_non_negative("Number of IRQs", optarg);
+                       if (nr_irqs > 1024 || nr_irqs % 32)
+                               help(argv[0]);
+                       break;
+               case 'e':
+                       eoi_split = (bool)atoi_paranoid(optarg);
+                       default_args = false;
+                       break;
+               case 'l':
+                       level_sensitive = (bool)atoi_paranoid(optarg);
+                       default_args = false;
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       /*
+        * If the user just specified nr_irqs and/or gic_version, then run all
+        * combinations.
+        */
+       if (default_args) {
+               test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+               test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+               test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+               test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+       } else {
+               test_vgic(nr_irqs, level_sensitive, eoi_split);
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c

new file mode 100644 (file)

index 0000000..fc4fe52
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_lpi_stress - Stress test for KVM's ITS emulation
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+
+#include <linux/sizes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_v3_its.h"
+#include "processor.h"
+#include "ucall.h"
+#include "vgic.h"
+
+#define TEST_MEMSLOT_INDEX     1
+
+#define GIC_LPI_OFFSET 8192
+
+static size_t nr_iterations = 1000;
+static vm_paddr_t gpa_base;
+
+static struct kvm_vm *vm;
+static struct kvm_vcpu **vcpus;
+static int gic_fd, its_fd;
+
+static struct test_data {
+       bool            request_vcpus_stop;
+       u32             nr_cpus;
+       u32             nr_devices;
+       u32             nr_event_ids;
+
+       vm_paddr_t      device_table;
+       vm_paddr_t      collection_table;
+       vm_paddr_t      cmdq_base;
+       void            *cmdq_base_va;
+       vm_paddr_t      itt_tables;
+
+       vm_paddr_t      lpi_prop_table;
+       vm_paddr_t      lpi_pend_tables;
+} test_data =  {
+       .nr_cpus        = 1,
+       .nr_devices     = 1,
+       .nr_event_ids   = 16,
+};
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       u32 intid = gic_get_and_ack_irq();
+
+       if (intid == IAR_SPURIOUS)
+               return;
+
+       GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
+       gic_set_eoi(intid);
+}
+
+static void guest_setup_its_mappings(void)
+{
+       u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
+       u32 nr_events = test_data.nr_event_ids;
+       u32 nr_devices = test_data.nr_devices;
+       u32 nr_cpus = test_data.nr_cpus;
+
+       for (coll_id = 0; coll_id < nr_cpus; coll_id++)
+               its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
+
+       /* Round-robin the LPIs to all of the vCPUs in the VM */
+       coll_id = 0;
+       for (device_id = 0; device_id < nr_devices; device_id++) {
+               vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+
+               its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
+                                 itt_base, SZ_64K, true);
+
+               for (event_id = 0; event_id < nr_events; event_id++) {
+                       its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
+                                          event_id, coll_id, intid++);
+
+                       coll_id = (coll_id + 1) % test_data.nr_cpus;
+               }
+       }
+}
+
+static void guest_invalidate_all_rdists(void)
+{
+       int i;
+
+       for (i = 0; i < test_data.nr_cpus; i++)
+               its_send_invall_cmd(test_data.cmdq_base_va, i);
+}
+
+static void guest_setup_gic(void)
+{
+       static atomic_int nr_cpus_ready = 0;
+       u32 cpuid = guest_get_vcpuid();
+
+       gic_init(GIC_V3, test_data.nr_cpus);
+       gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
+                             test_data.lpi_pend_tables + (cpuid * SZ_64K));
+
+       atomic_fetch_add(&nr_cpus_ready, 1);
+
+       if (cpuid > 0)
+               return;
+
+       while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
+               cpu_relax();
+
+       its_init(test_data.collection_table, SZ_64K,
+                test_data.device_table, SZ_64K,
+                test_data.cmdq_base, SZ_64K);
+
+       guest_setup_its_mappings();
+       guest_invalidate_all_rdists();
+}
+
+static void guest_code(size_t nr_lpis)
+{
+       guest_setup_gic();
+
+       GUEST_SYNC(0);
+
+       /*
+        * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
+        * never getting the stop signal.
+        */
+       while (!READ_ONCE(test_data.request_vcpus_stop))
+               cpu_relax();
+
+       GUEST_DONE();
+}
+
+static void setup_memslot(void)
+{
+       size_t pages;
+       size_t sz;
+
+       /*
+        * For the ITS:
+        *  - A single level device table
+        *  - A single level collection table
+        *  - The command queue
+        *  - An ITT for each device
+        */
+       sz = (3 + test_data.nr_devices) * SZ_64K;
+
+       /*
+        * For the redistributors:
+        *  - A shared LPI configuration table
+        *  - An LPI pending table for each vCPU
+        */
+       sz += (1 + test_data.nr_cpus) * SZ_64K;
+
+       pages = sz / vm->page_size;
+       gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
+                                   TEST_MEMSLOT_INDEX, pages, 0);
+}
+
+#define LPI_PROP_DEFAULT_PRIO  0xa0
+
+static void configure_lpis(void)
+{
+       size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
+       u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
+       size_t i;
+
+       for (i = 0; i < nr_lpis; i++) {
+               tbl[i] = LPI_PROP_DEFAULT_PRIO |
+                        LPI_PROP_GROUP1 |
+                        LPI_PROP_ENABLED;
+       }
+}
+
+static void setup_test_data(void)
+{
+       size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
+       u32 nr_devices = test_data.nr_devices;
+       u32 nr_cpus = test_data.nr_cpus;
+       vm_paddr_t cmdq_base;
+
+       test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
+                                                   gpa_base,
+                                                   TEST_MEMSLOT_INDEX);
+
+       test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
+                                                       gpa_base,
+                                                       TEST_MEMSLOT_INDEX);
+
+       cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
+                                      TEST_MEMSLOT_INDEX);
+       virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
+       test_data.cmdq_base = cmdq_base;
+       test_data.cmdq_base_va = (void *)cmdq_base;
+
+       test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
+                                                 gpa_base, TEST_MEMSLOT_INDEX);
+
+       test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
+                                                     gpa_base, TEST_MEMSLOT_INDEX);
+       configure_lpis();
+
+       test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
+                                                      gpa_base, TEST_MEMSLOT_INDEX);
+
+       sync_global_to_guest(vm, test_data);
+}
+
+static void setup_gic(void)
+{
+       gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
+       __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
+
+       its_fd = vgic_its_setup(vm);
+}
+
+static void signal_lpi(u32 device_id, u32 event_id)
+{
+       vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+
+       struct kvm_msi msi = {
+               .address_lo     = db_addr,
+               .address_hi     = db_addr >> 32,
+               .data           = event_id,
+               .devid          = device_id,
+               .flags          = KVM_MSI_VALID_DEVID,
+       };
+
+       /*
+        * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
+        * which for arm64 implies having a valid translation in the ITS.
+        */
+       TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
+                   "KVM_SIGNAL_MSI ioctl failed");
+}
+
+static pthread_barrier_t test_setup_barrier;
+
+static void *lpi_worker_thread(void *data)
+{
+       u32 device_id = (size_t)data;
+       u32 event_id;
+       size_t i;
+
+       pthread_barrier_wait(&test_setup_barrier);
+
+       for (i = 0; i < nr_iterations; i++)
+               for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
+                       signal_lpi(device_id, event_id);
+
+       return NULL;
+}
+
+static void *vcpu_worker_thread(void *data)
+{
+       struct kvm_vcpu *vcpu = data;
+       struct ucall uc;
+
+       while (true) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       pthread_barrier_wait(&test_setup_barrier);
+                       continue;
+               case UCALL_DONE:
+                       return NULL;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall: %lu", uc.cmd);
+               }
+       }
+
+       return NULL;
+}
+
+static void report_stats(struct timespec delta)
+{
+       double nr_lpis;
+       double time;
+
+       nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
+
+       time = delta.tv_sec;
+       time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
+
+       pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
+}
+
+static void run_test(void)
+{
+       u32 nr_devices = test_data.nr_devices;
+       u32 nr_vcpus = test_data.nr_cpus;
+       pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
+       pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
+       struct timespec start, delta;
+       size_t i;
+
+       TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
+
+       pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
+
+       for (i = 0; i < nr_vcpus; i++)
+               pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
+
+       for (i = 0; i < nr_devices; i++)
+               pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
+
+       pthread_barrier_wait(&test_setup_barrier);
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (i = 0; i < nr_devices; i++)
+               pthread_join(lpi_threads[i], NULL);
+
+       delta = timespec_elapsed(start);
+       write_guest_global(vm, test_data.request_vcpus_stop, true);
+
+       for (i = 0; i < nr_vcpus; i++)
+               pthread_join(vcpu_threads[i], NULL);
+
+       report_stats(delta);
+}
+
+static void setup_vm(void)
+{
+       int i;
+
+       vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+       TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+       vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
+
+       vm_init_descriptor_tables(vm);
+       for (i = 0; i < test_data.nr_cpus; i++)
+               vcpu_init_descriptor_tables(vcpus[i]);
+
+       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+       setup_memslot();
+
+       setup_gic();
+
+       setup_test_data();
+}
+
+static void destroy_vm(void)
+{
+       close(its_fd);
+       close(gic_fd);
+       kvm_vm_free(vm);
+       free(vcpus);
+}
+
+static void pr_usage(const char *name)
+{
+       pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
+       pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
+       pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
+       pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
+       pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
+}
+
+int main(int argc, char **argv)
+{
+       u32 nr_threads;
+       int c;
+
+       while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
+               switch (c) {
+               case 'v':
+                       test_data.nr_cpus = atoi(optarg);
+                       break;
+               case 'd':
+                       test_data.nr_devices = atoi(optarg);
+                       break;
+               case 'e':
+                       test_data.nr_event_ids = atoi(optarg);
+                       break;
+               case 'i':
+                       nr_iterations = strtoul(optarg, NULL, 0);
+                       break;
+               case 'h':
+               default:
+                       pr_usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       nr_threads = test_data.nr_cpus + test_data.nr_devices;
+       if (nr_threads > get_nprocs())
+               pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
+                        nr_threads, get_nprocs());
+
+       setup_vm();
+
+       run_test();
+
+       destroy_vm();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c

new file mode 100644 (file)

index 0000000..f16b3b2
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX            31
+
+struct vpmu_vm {
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       int gic_fd;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+       uint64_t set_reg_id;
+       uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+       return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
+}
+
+static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+{
+       u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+       uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+       if (n)
+               mask |= GENMASK(n - 1, 0);
+       return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       write_sysreg(val, pmxevcntr_el0);
+       isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       write_sysreg(val, pmxevtyper_el0);
+       isb();
+}
+
+static void pmu_disable_reset(void)
+{
+       uint64_t pmcr = read_sysreg(pmcr_el0);
+
+       /* Reset all counters, disabling them */
+       pmcr &= ~ARMV8_PMU_PMCR_E;
+       write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+       isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+       return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+       PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+       return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+       write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+       PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+       isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+       return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+       PMEVN_SWITCH(n, READ_PMEVTYPERN);
+       return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+       write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+       PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+       isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+       /* A function to be used to read PMEVTCNTR<n>_EL0 */
+       unsigned long   (*read_cntr)(int idx);
+       /* A function to be used to write PMEVTCNTR<n>_EL0 */
+       void            (*write_cntr)(int idx, unsigned long val);
+       /* A function to be used to read PMEVTYPER<n>_EL0 */
+       unsigned long   (*read_typer)(int idx);
+       /* A function to be used to write PMEVTYPER<n>_EL0 */
+       void            (*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+       /* test with all direct accesses */
+       { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+       /* test with all indirect accesses */
+       { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+       /* read with direct accesses, and write with indirect accesses */
+       { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+       /* read with indirect accesses, and write with direct accesses */
+       { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc)    (acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected)                    \
+{                                                                               \
+       uint64_t _tval = read_sysreg(regname);                                   \
+                                                                                \
+       if (set_expected)                                                        \
+               __GUEST_ASSERT((_tval & mask),                                   \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
+                               _tval, mask, set_expected);                      \
+       else                                                                     \
+               __GUEST_ASSERT(!(_tval & mask),                                  \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
+                               _tval, mask, set_expected);                      \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+       GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+       uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+       bool set_expected = false;
+
+       if (set_op) {
+               write_sysreg(test_bit, pmcntenset_el0);
+               write_sysreg(test_bit, pmintenset_el1);
+               write_sysreg(test_bit, pmovsset_el0);
+
+               /* The bit will be set only if the counter is implemented */
+               pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+               set_expected = (pmc_idx < pmcr_n) ? true : false;
+       } else {
+               write_sysreg(test_bit, pmcntenclr_el0);
+               write_sysreg(test_bit, pmintenclr_el1);
+               write_sysreg(test_bit, pmovsclr_el0);
+       }
+       check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+       uint64_t write_data, read_data;
+
+       /* Disable all PMCs and reset all PMCs to zero. */
+       pmu_disable_reset();
+
+       /*
+        * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+        */
+
+       /* Make sure that the bit in those registers are set to 0 */
+       test_bitmap_pmu_regs(pmc_idx, false);
+       /* Test if setting the bit in those registers works */
+       test_bitmap_pmu_regs(pmc_idx, true);
+       /* Test if clearing the bit in those registers works */
+       test_bitmap_pmu_regs(pmc_idx, false);
+
+       /*
+        * Tests for reading/writing the event type register.
+        */
+
+       /*
+        * Set the event type register to an arbitrary value just for testing
+        * of reading/writing the register.
+        * Arm ARM says that for the event from 0x0000 to 0x003F,
+        * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+        * the value written to the field even when the specified event
+        * is not supported.
+        */
+       write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+       acc->write_typer(pmc_idx, write_data);
+       read_data = acc->read_typer(pmc_idx);
+       __GUEST_ASSERT(read_data == write_data,
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+       /*
+        * Tests for reading/writing the event count register.
+        */
+
+       read_data = acc->read_cntr(pmc_idx);
+
+       /* The count value must be 0, as it is disabled and reset */
+       __GUEST_ASSERT(read_data == 0,
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
+                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+       write_data = read_data + pmc_idx + 0x12345;
+       acc->write_cntr(pmc_idx, write_data);
+       read_data = acc->read_cntr(pmc_idx);
+       __GUEST_ASSERT(read_data == write_data,
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC     (-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+       uint64_t esr, ec;
+
+       esr = read_sysreg(esr_el1);
+       ec = ESR_ELx_EC(esr);
+
+       __GUEST_ASSERT(expected_ec == ec,
+                       "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+                       regs->pc, esr, ec, expected_ec);
+
+       /* skip the trapping instruction */
+       regs->pc += 4;
+
+       /* Use INVALID_EC to indicate an exception occurred */
+       expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops)                                \
+({                                                     \
+       GUEST_ASSERT(ec != INVALID_EC);                 \
+       WRITE_ONCE(expected_ec, ec);                    \
+       dsb(ish);                                       \
+       ops;                                            \
+       GUEST_ASSERT(expected_ec == INVALID_EC);        \
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+       /*
+        * Reading/writing the event count/type registers should cause
+        * an UNDEFINED exception.
+        */
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+       /*
+        * The bit corresponding to the (unimplemented) counter in
+        * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+        */
+       test_bitmap_pmu_regs(pmc_idx, 1);
+       test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+       uint64_t pmcr, pmcr_n, unimp_mask;
+       int i, pmc;
+
+       __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
+                       expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+       pmcr = read_sysreg(pmcr_el0);
+       pmcr_n = get_pmcr_n(pmcr);
+
+       /* Make sure that PMCR_EL0.N indicates the value userspace set */
+       __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+                       "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+                       expected_pmcr_n, pmcr_n);
+
+       /*
+        * Make sure that (RAZ) bits corresponding to unimplemented event
+        * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+        * to zero.
+        * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+        */
+       unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+       check_bitmap_pmu_regs(unimp_mask, false);
+
+       /*
+        * Tests for reading/writing PMU registers for implemented counters.
+        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+        */
+       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+               for (pmc = 0; pmc < pmcr_n; pmc++)
+                       test_access_pmc_regs(&pmc_accessors[i], pmc);
+       }
+
+       /*
+        * Tests for reading/writing PMU registers for unimplemented counters.
+        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+        */
+       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+               for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+                       test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+       }
+
+       GUEST_DONE();
+}
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+       struct kvm_vcpu_init init;
+       uint8_t pmuver, ec;
+       uint64_t dfr0, irq = 23;
+       struct kvm_device_attr irq_attr = {
+               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+               .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+               .addr = (uint64_t)&irq,
+       };
+       struct kvm_device_attr init_attr = {
+               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+               .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+       };
+
+       /* The test creates the vpmu_vm multiple times. Ensure a clean state */
+       memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+       vpmu_vm.vm = vm_create(1);
+       vm_init_descriptor_tables(vpmu_vm.vm);
+       for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
+               vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+                                       guest_sync_handler);
+       }
+
+       /* Create vCPU with PMUv3 */
+       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+       vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+       vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+       vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
+       __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
+                      "Failed to create vgic-v3, skipping");
+
+       /* Make sure that PMUv3 support is indicated in the ID register */
+       dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+       pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+       TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+                   pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+                   "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+       /* Initialize vPMU */
+       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+       close(vpmu_vm.gic_fd);
+       kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+       struct ucall uc;
+
+       vcpu_args_set(vcpu, 1, pmcr_n);
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               break;
+       }
+}
+
+static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t pmcr, pmcr_orig;
+
+       create_vpmu_vm(guest_code);
+       vcpu = vpmu_vm.vcpu;
+
+       pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+       pmcr = pmcr_orig;
+
+       /*
+        * Setting a larger value of PMCR.N should not modify the field, and
+        * return a success.
+        */
+       set_pmcr_n(&pmcr, pmcr_n);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
+       pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+
+       if (expect_fail)
+               TEST_ASSERT(pmcr_orig == pmcr,
+                           "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
+                           pmcr, pmcr_n);
+       else
+               TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
+                           "Failed to update PMCR.N to %lu (received: %lu)",
+                           pmcr_n, get_pmcr_n(pmcr));
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+       uint64_t sp;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vcpu_init init;
+
+       pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+       vcpu = vpmu_vm.vcpu;
+
+       /* Save the initial sp to restore them later to run the guest again */
+       sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
+
+       run_vcpu(vcpu, pmcr_n);
+
+       /*
+        * Reset and re-initialize the vCPU, and run the guest code again to
+        * check if PMCR_EL0.N is preserved.
+        */
+       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+       aarch64_vcpu_setup(vcpu, &init);
+       vcpu_init_descriptor_tables(vcpu);
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+       run_vcpu(vcpu, pmcr_n);
+
+       destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+       PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+       PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+       PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+       int i;
+       struct kvm_vcpu *vcpu;
+       uint64_t set_reg_id, clr_reg_id, reg_val;
+       uint64_t valid_counters_mask, max_counters_mask;
+
+       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+       vcpu = vpmu_vm.vcpu;
+
+       valid_counters_mask = get_counters_mask(pmcr_n);
+       max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+       for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+               set_reg_id = validity_check_reg_sets[i].set_reg_id;
+               clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+               /*
+                * Test if the 'set' and 'clr' variants of the registers
+                * are initialized based on the number of valid counters.
+                */
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+               /*
+                * Using the 'set' variant, force-set the register to the
+                * max number of possible counters and test if KVM discards
+                * the bits for unimplemented counters as it should.
+                */
+               vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+       }
+
+       destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+       pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+       test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+       destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+       uint64_t pmcr;
+
+       create_vpmu_vm(guest_code);
+       pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+       destroy_vpmu_vm();
+       return get_pmcr_n(pmcr);
+}
+
+int main(void)
+{
+       uint64_t i, pmcr_n;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+
+       pmcr_n = get_pmcr_n_limit();
+       for (i = 0; i <= pmcr_n; i++) {
+               run_access_test(i);
+               run_pmregs_validity_test(i);
+       }
+
+       for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+               run_error_test(i);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c

index 9f24303acb8cb2f9ff53d3fd2699ffb47e87a83e..e79817bd0e29e31fd530480cfba9acbae023fb1c 100644 (file)
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -21,7 +21,7 @@
  #include "ucall_common.h"
  
  #ifdef __aarch64__
-#include "aarch64/vgic.h"
+#include "arm64/vgic.h"
  
  static int gic_fd;
  
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h

deleted file mode 100644 (file)

index bf461de..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Timer specific interface
- */
-
-#ifndef SELFTEST_KVM_ARCH_TIMER_H
-#define SELFTEST_KVM_ARCH_TIMER_H
-
-#include "processor.h"
-
-enum arch_timer {
-       VIRTUAL,
-       PHYSICAL,
-};
-
-#define CTL_ENABLE     (1 << 0)
-#define CTL_IMASK      (1 << 1)
-#define CTL_ISTATUS    (1 << 2)
-
-#define msec_to_cycles(msec)   \
-       (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
-
-#define usec_to_cycles(usec)   \
-       (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
-
-#define cycles_to_usec(cycles) \
-       ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
-
-static inline uint32_t timer_get_cntfrq(void)
-{
-       return read_sysreg(cntfrq_el0);
-}
-
-static inline uint64_t timer_get_cntct(enum arch_timer timer)
-{
-       isb();
-
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntvct_el0);
-       case PHYSICAL:
-               return read_sysreg(cntpct_el0);
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
-{
-       switch (timer) {
-       case VIRTUAL:
-               write_sysreg(cval, cntv_cval_el0);
-               break;
-       case PHYSICAL:
-               write_sysreg(cval, cntp_cval_el0);
-               break;
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       isb();
-}
-
-static inline uint64_t timer_get_cval(enum arch_timer timer)
-{
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntv_cval_el0);
-       case PHYSICAL:
-               return read_sysreg(cntp_cval_el0);
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
-{
-       switch (timer) {
-       case VIRTUAL:
-               write_sysreg(tval, cntv_tval_el0);
-               break;
-       case PHYSICAL:
-               write_sysreg(tval, cntp_tval_el0);
-               break;
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       isb();
-}
-
-static inline int32_t timer_get_tval(enum arch_timer timer)
-{
-       isb();
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntv_tval_el0);
-       case PHYSICAL:
-               return read_sysreg(cntp_tval_el0);
-       default:
-               GUEST_FAIL("Could not get timer %d\n", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
-{
-       switch (timer) {
-       case VIRTUAL:
-               write_sysreg(ctl, cntv_ctl_el0);
-               break;
-       case PHYSICAL:
-               write_sysreg(ctl, cntp_ctl_el0);
-               break;
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       isb();
-}
-
-static inline uint32_t timer_get_ctl(enum arch_timer timer)
-{
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntv_ctl_el0);
-       case PHYSICAL:
-               return read_sysreg(cntp_ctl_el0);
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
-{
-       uint64_t now_ct = timer_get_cntct(timer);
-       uint64_t next_ct = now_ct + msec_to_cycles(msec);
-
-       timer_set_cval(timer, next_ct);
-}
-
-static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
-{
-       timer_set_tval(timer, msec_to_cycles(msec));
-}
-
-#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/aarch64/delay.h

deleted file mode 100644 (file)

index 329e4f5..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/delay.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM simple delay routines
- */
-
-#ifndef SELFTEST_KVM_ARM_DELAY_H
-#define SELFTEST_KVM_ARM_DELAY_H
-
-#include "arch_timer.h"
-
-static inline void __delay(uint64_t cycles)
-{
-       enum arch_timer timer = VIRTUAL;
-       uint64_t start = timer_get_cntct(timer);
-
-       while ((timer_get_cntct(timer) - start) < cycles)
-               cpu_relax();
-}
-
-static inline void udelay(unsigned long usec)
-{
-       __delay(usec_to_cycles(usec));
-}
-
-#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h

deleted file mode 100644 (file)

index baeb3c8..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) specific defines
- */
-
-#ifndef SELFTEST_KVM_GIC_H
-#define SELFTEST_KVM_GIC_H
-
-#include <asm/kvm.h>
-
-enum gic_type {
-       GIC_V3,
-       GIC_TYPE_MAX,
-};
-
-/*
- * Note that the redistributor frames are at the end, as the range scales
- * with the number of vCPUs in the VM.
- */
-#define GITS_BASE_GPA          0x8000000ULL
-#define GICD_BASE_GPA          (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
-#define GICR_BASE_GPA          (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
-
-/* The GIC is identity-mapped into the guest at the time of setup. */
-#define GITS_BASE_GVA          ((volatile void *)GITS_BASE_GPA)
-#define GICD_BASE_GVA          ((volatile void *)GICD_BASE_GPA)
-#define GICR_BASE_GVA          ((volatile void *)GICR_BASE_GPA)
-
-#define MIN_SGI                        0
-#define MIN_PPI                        16
-#define MIN_SPI                        32
-#define MAX_SPI                        1019
-#define IAR_SPURIOUS           1023
-
-#define INTID_IS_SGI(intid)    (0       <= (intid) && (intid) < MIN_PPI)
-#define INTID_IS_PPI(intid)    (MIN_PPI <= (intid) && (intid) < MIN_SPI)
-#define INTID_IS_SPI(intid)    (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
-
-void gic_init(enum gic_type type, unsigned int nr_cpus);
-void gic_irq_enable(unsigned int intid);
-void gic_irq_disable(unsigned int intid);
-unsigned int gic_get_and_ack_irq(void);
-void gic_set_eoi(unsigned int intid);
-void gic_set_dir(unsigned int intid);
-
-/*
- * Sets the EOI mode. When split is false, EOI just drops the priority. When
- * split is true, EOI drops the priority and deactivates the interrupt.
- */
-void gic_set_eoi_split(bool split);
-void gic_set_priority_mask(uint64_t mask);
-void gic_set_priority(uint32_t intid, uint32_t prio);
-void gic_irq_set_active(unsigned int intid);
-void gic_irq_clear_active(unsigned int intid);
-bool gic_irq_get_active(unsigned int intid);
-void gic_irq_set_pending(unsigned int intid);
-void gic_irq_clear_pending(unsigned int intid);
-bool gic_irq_get_pending(unsigned int intid);
-void gic_irq_set_config(unsigned int intid, bool is_edge);
-
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
-                          vm_paddr_t pend_table);
-
-#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h

deleted file mode 100644 (file)

index a76615f..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
+++ /dev/null
@@ -1,604 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-#ifndef __SELFTESTS_GIC_V3_H
-#define __SELFTESTS_GIC_V3_H
-
-/*
- * Distributor registers. We assume we're running non-secure, with ARE
- * being set. Secure-only and non-ARE registers are not described.
- */
-#define GICD_CTLR                      0x0000
-#define GICD_TYPER                     0x0004
-#define GICD_IIDR                      0x0008
-#define GICD_TYPER2                    0x000C
-#define GICD_STATUSR                   0x0010
-#define GICD_SETSPI_NSR                        0x0040
-#define GICD_CLRSPI_NSR                        0x0048
-#define GICD_SETSPI_SR                 0x0050
-#define GICD_CLRSPI_SR                 0x0058
-#define GICD_IGROUPR                   0x0080
-#define GICD_ISENABLER                 0x0100
-#define GICD_ICENABLER                 0x0180
-#define GICD_ISPENDR                   0x0200
-#define GICD_ICPENDR                   0x0280
-#define GICD_ISACTIVER                 0x0300
-#define GICD_ICACTIVER                 0x0380
-#define GICD_IPRIORITYR                        0x0400
-#define GICD_ICFGR                     0x0C00
-#define GICD_IGRPMODR                  0x0D00
-#define GICD_NSACR                     0x0E00
-#define GICD_IGROUPRnE                 0x1000
-#define GICD_ISENABLERnE               0x1200
-#define GICD_ICENABLERnE               0x1400
-#define GICD_ISPENDRnE                 0x1600
-#define GICD_ICPENDRnE                 0x1800
-#define GICD_ISACTIVERnE               0x1A00
-#define GICD_ICACTIVERnE               0x1C00
-#define GICD_IPRIORITYRnE              0x2000
-#define GICD_ICFGRnE                   0x3000
-#define GICD_IROUTER                   0x6000
-#define GICD_IROUTERnE                 0x8000
-#define GICD_IDREGS                    0xFFD0
-#define GICD_PIDR2                     0xFFE8
-
-#define ESPI_BASE_INTID                        4096
-
-/*
- * Those registers are actually from GICv2, but the spec demands that they
- * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
- */
-#define GICD_ITARGETSR                 0x0800
-#define GICD_SGIR                      0x0F00
-#define GICD_CPENDSGIR                 0x0F10
-#define GICD_SPENDSGIR                 0x0F20
-
-#define GICD_CTLR_RWP                  (1U << 31)
-#define GICD_CTLR_nASSGIreq            (1U << 8)
-#define GICD_CTLR_DS                   (1U << 6)
-#define GICD_CTLR_ARE_NS               (1U << 4)
-#define GICD_CTLR_ENABLE_G1A           (1U << 1)
-#define GICD_CTLR_ENABLE_G1            (1U << 0)
-
-#define GICD_IIDR_IMPLEMENTER_SHIFT    0
-#define GICD_IIDR_IMPLEMENTER_MASK     (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
-#define GICD_IIDR_REVISION_SHIFT       12
-#define GICD_IIDR_REVISION_MASK                (0xf << GICD_IIDR_REVISION_SHIFT)
-#define GICD_IIDR_VARIANT_SHIFT                16
-#define GICD_IIDR_VARIANT_MASK         (0xf << GICD_IIDR_VARIANT_SHIFT)
-#define GICD_IIDR_PRODUCT_ID_SHIFT     24
-#define GICD_IIDR_PRODUCT_ID_MASK      (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
-
-
-/*
- * In systems with a single security state (what we emulate in KVM)
- * the meaning of the interrupt group enable bits is slightly different
- */
-#define GICD_CTLR_ENABLE_SS_G1         (1U << 1)
-#define GICD_CTLR_ENABLE_SS_G0         (1U << 0)
-
-#define GICD_TYPER_RSS                 (1U << 26)
-#define GICD_TYPER_LPIS                        (1U << 17)
-#define GICD_TYPER_MBIS                        (1U << 16)
-#define GICD_TYPER_ESPI                        (1U << 8)
-
-#define GICD_TYPER_ID_BITS(typer)      ((((typer) >> 19) & 0x1f) + 1)
-#define GICD_TYPER_NUM_LPIS(typer)     ((((typer) >> 11) & 0x1f) + 1)
-#define GICD_TYPER_SPIS(typer)         ((((typer) & 0x1f) + 1) * 32)
-#define GICD_TYPER_ESPIS(typer)                                                \
-       (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
-
-#define GICD_TYPER2_nASSGIcap          (1U << 8)
-#define GICD_TYPER2_VIL                        (1U << 7)
-#define GICD_TYPER2_VID                        GENMASK(4, 0)
-
-#define GICD_IROUTER_SPI_MODE_ONE      (0U << 31)
-#define GICD_IROUTER_SPI_MODE_ANY      (1U << 31)
-
-#define GIC_PIDR2_ARCH_MASK            0xf0
-#define GIC_PIDR2_ARCH_GICv3           0x30
-#define GIC_PIDR2_ARCH_GICv4           0x40
-
-#define GIC_V3_DIST_SIZE               0x10000
-
-#define GIC_PAGE_SIZE_4K               0ULL
-#define GIC_PAGE_SIZE_16K              1ULL
-#define GIC_PAGE_SIZE_64K              2ULL
-#define GIC_PAGE_SIZE_MASK             3ULL
-
-/*
- * Re-Distributor registers, offsets from RD_base
- */
-#define GICR_CTLR                      GICD_CTLR
-#define GICR_IIDR                      0x0004
-#define GICR_TYPER                     0x0008
-#define GICR_STATUSR                   GICD_STATUSR
-#define GICR_WAKER                     0x0014
-#define GICR_SETLPIR                   0x0040
-#define GICR_CLRLPIR                   0x0048
-#define GICR_PROPBASER                 0x0070
-#define GICR_PENDBASER                 0x0078
-#define GICR_INVLPIR                   0x00A0
-#define GICR_INVALLR                   0x00B0
-#define GICR_SYNCR                     0x00C0
-#define GICR_IDREGS                    GICD_IDREGS
-#define GICR_PIDR2                     GICD_PIDR2
-
-#define GICR_CTLR_ENABLE_LPIS          (1UL << 0)
-#define GICR_CTLR_CES                  (1UL << 1)
-#define GICR_CTLR_IR                   (1UL << 2)
-#define GICR_CTLR_RWP                  (1UL << 3)
-
-#define GICR_TYPER_CPU_NUMBER(r)       (((r) >> 8) & 0xffff)
-
-#define EPPI_BASE_INTID                        1056
-
-#define GICR_TYPER_NR_PPIS(r)                                          \
-       ({                                                              \
-               unsigned int __ppinum = ((r) >> 27) & 0x1f;             \
-               unsigned int __nr_ppis = 16;                            \
-               if (__ppinum == 1 || __ppinum == 2)                     \
-                       __nr_ppis +=  __ppinum * 32;                    \
-                                                                       \
-               __nr_ppis;                                              \
-        })
-
-#define GICR_WAKER_ProcessorSleep      (1U << 1)
-#define GICR_WAKER_ChildrenAsleep      (1U << 2)
-
-#define GIC_BASER_CACHE_nCnB           0ULL
-#define GIC_BASER_CACHE_SameAsInner    0ULL
-#define GIC_BASER_CACHE_nC             1ULL
-#define GIC_BASER_CACHE_RaWt           2ULL
-#define GIC_BASER_CACHE_RaWb           3ULL
-#define GIC_BASER_CACHE_WaWt           4ULL
-#define GIC_BASER_CACHE_WaWb           5ULL
-#define GIC_BASER_CACHE_RaWaWt         6ULL
-#define GIC_BASER_CACHE_RaWaWb         7ULL
-#define GIC_BASER_CACHE_MASK           7ULL
-#define GIC_BASER_NonShareable         0ULL
-#define GIC_BASER_InnerShareable       1ULL
-#define GIC_BASER_OuterShareable       2ULL
-#define GIC_BASER_SHAREABILITY_MASK    3ULL
-
-#define GIC_BASER_CACHEABILITY(reg, inner_outer, type)                 \
-       (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
-
-#define GIC_BASER_SHAREABILITY(reg, type)                              \
-       (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
-
-/* encode a size field of width @w containing @n - 1 units */
-#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
-
-#define GICR_PROPBASER_SHAREABILITY_SHIFT              (10)
-#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT                (7)
-#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT                (56)
-#define GICR_PROPBASER_SHAREABILITY_MASK                               \
-       GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
-#define GICR_PROPBASER_INNER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
-#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
-#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_PROPBASER_InnerShareable                                  \
-       GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
-
-#define GICR_PROPBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
-#define GICR_PROPBASER_nC      GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
-#define GICR_PROPBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
-#define GICR_PROPBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
-#define GICR_PROPBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
-#define GICR_PROPBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
-#define GICR_PROPBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
-#define GICR_PROPBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
-
-#define GICR_PROPBASER_IDBITS_MASK                     (0x1f)
-#define GICR_PROPBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 12))
-#define GICR_PENDBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 16))
-
-#define GICR_PENDBASER_SHAREABILITY_SHIFT              (10)
-#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT                (7)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT                (56)
-#define GICR_PENDBASER_SHAREABILITY_MASK                               \
-       GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
-#define GICR_PENDBASER_INNER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
-#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_PENDBASER_InnerShareable                                  \
-       GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
-
-#define GICR_PENDBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
-#define GICR_PENDBASER_nC      GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
-#define GICR_PENDBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
-#define GICR_PENDBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
-#define GICR_PENDBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
-#define GICR_PENDBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
-#define GICR_PENDBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
-#define GICR_PENDBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
-
-#define GICR_PENDBASER_PTZ                             BIT_ULL(62)
-
-/*
- * Re-Distributor registers, offsets from SGI_base
- */
-#define GICR_IGROUPR0                  GICD_IGROUPR
-#define GICR_ISENABLER0                        GICD_ISENABLER
-#define GICR_ICENABLER0                        GICD_ICENABLER
-#define GICR_ISPENDR0                  GICD_ISPENDR
-#define GICR_ICPENDR0                  GICD_ICPENDR
-#define GICR_ISACTIVER0                        GICD_ISACTIVER
-#define GICR_ICACTIVER0                        GICD_ICACTIVER
-#define GICR_IPRIORITYR0               GICD_IPRIORITYR
-#define GICR_ICFGR0                    GICD_ICFGR
-#define GICR_IGRPMODR0                 GICD_IGRPMODR
-#define GICR_NSACR                     GICD_NSACR
-
-#define GICR_TYPER_PLPIS               (1U << 0)
-#define GICR_TYPER_VLPIS               (1U << 1)
-#define GICR_TYPER_DIRTY               (1U << 2)
-#define GICR_TYPER_DirectLPIS          (1U << 3)
-#define GICR_TYPER_LAST                        (1U << 4)
-#define GICR_TYPER_RVPEID              (1U << 7)
-#define GICR_TYPER_COMMON_LPI_AFF      GENMASK_ULL(25, 24)
-#define GICR_TYPER_AFFINITY            GENMASK_ULL(63, 32)
-
-#define GICR_INVLPIR_INTID             GENMASK_ULL(31, 0)
-#define GICR_INVLPIR_VPEID             GENMASK_ULL(47, 32)
-#define GICR_INVLPIR_V                 GENMASK_ULL(63, 63)
-
-#define GICR_INVALLR_VPEID             GICR_INVLPIR_VPEID
-#define GICR_INVALLR_V                 GICR_INVLPIR_V
-
-#define GIC_V3_REDIST_SIZE             0x20000
-
-#define LPI_PROP_GROUP1                        (1 << 1)
-#define LPI_PROP_ENABLED               (1 << 0)
-
-/*
- * Re-Distributor registers, offsets from VLPI_base
- */
-#define GICR_VPROPBASER                        0x0070
-
-#define GICR_VPROPBASER_IDBITS_MASK    0x1f
-
-#define GICR_VPROPBASER_SHAREABILITY_SHIFT             (10)
-#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT       (7)
-#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT       (56)
-
-#define GICR_VPROPBASER_SHAREABILITY_MASK                              \
-       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
-#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
-#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
-#define GICR_VPROPBASER_CACHEABILITY_MASK                              \
-       GICR_VPROPBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_VPROPBASER_InnerShareable                                 \
-       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
-
-#define GICR_VPROPBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
-#define GICR_VPROPBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
-#define GICR_VPROPBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
-#define GICR_VPROPBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
-#define GICR_VPROPBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
-#define GICR_VPROPBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
-#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
-#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
-
-/*
- * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
- * VPROPBASER and ITS_BASER. Just not quite any of the two.
- */
-#define GICR_VPROPBASER_4_1_VALID      (1ULL << 63)
-#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
-#define GICR_VPROPBASER_4_1_INDIRECT   (1ULL << 55)
-#define GICR_VPROPBASER_4_1_PAGE_SIZE  GENMASK_ULL(54, 53)
-#define GICR_VPROPBASER_4_1_Z          (1ULL << 52)
-#define GICR_VPROPBASER_4_1_ADDR       GENMASK_ULL(51, 12)
-#define GICR_VPROPBASER_4_1_SIZE       GENMASK_ULL(6, 0)
-
-#define GICR_VPENDBASER                        0x0078
-
-#define GICR_VPENDBASER_SHAREABILITY_SHIFT             (10)
-#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT       (7)
-#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT       (56)
-#define GICR_VPENDBASER_SHAREABILITY_MASK                              \
-       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
-#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
-#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
-#define GICR_VPENDBASER_CACHEABILITY_MASK                              \
-       GICR_VPENDBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_VPENDBASER_NonShareable                                   \
-       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
-
-#define GICR_VPENDBASER_InnerShareable                                 \
-       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
-
-#define GICR_VPENDBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
-#define GICR_VPENDBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
-#define GICR_VPENDBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
-#define GICR_VPENDBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
-#define GICR_VPENDBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
-#define GICR_VPENDBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
-#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
-#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
-
-#define GICR_VPENDBASER_Dirty          (1ULL << 60)
-#define GICR_VPENDBASER_PendingLast    (1ULL << 61)
-#define GICR_VPENDBASER_IDAI           (1ULL << 62)
-#define GICR_VPENDBASER_Valid          (1ULL << 63)
-
-/*
- * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
- * also use the above Valid, PendingLast and Dirty.
- */
-#define GICR_VPENDBASER_4_1_DB         (1ULL << 62)
-#define GICR_VPENDBASER_4_1_VGRP0EN    (1ULL << 59)
-#define GICR_VPENDBASER_4_1_VGRP1EN    (1ULL << 58)
-#define GICR_VPENDBASER_4_1_VPEID      GENMASK_ULL(15, 0)
-
-#define GICR_VSGIR                     0x0080
-
-#define GICR_VSGIR_VPEID               GENMASK(15, 0)
-
-#define GICR_VSGIPENDR                 0x0088
-
-#define GICR_VSGIPENDR_BUSY            (1U << 31)
-#define GICR_VSGIPENDR_PENDING         GENMASK(15, 0)
-
-/*
- * ITS registers, offsets from ITS_base
- */
-#define GITS_CTLR                      0x0000
-#define GITS_IIDR                      0x0004
-#define GITS_TYPER                     0x0008
-#define GITS_MPIDR                     0x0018
-#define GITS_CBASER                    0x0080
-#define GITS_CWRITER                   0x0088
-#define GITS_CREADR                    0x0090
-#define GITS_BASER                     0x0100
-#define GITS_IDREGS_BASE               0xffd0
-#define GITS_PIDR0                     0xffe0
-#define GITS_PIDR1                     0xffe4
-#define GITS_PIDR2                     GICR_PIDR2
-#define GITS_PIDR4                     0xffd0
-#define GITS_CIDR0                     0xfff0
-#define GITS_CIDR1                     0xfff4
-#define GITS_CIDR2                     0xfff8
-#define GITS_CIDR3                     0xfffc
-
-#define GITS_TRANSLATER                        0x10040
-
-#define GITS_SGIR                      0x20020
-
-#define GITS_SGIR_VPEID                        GENMASK_ULL(47, 32)
-#define GITS_SGIR_VINTID               GENMASK_ULL(3, 0)
-
-#define GITS_CTLR_ENABLE               (1U << 0)
-#define GITS_CTLR_ImDe                 (1U << 1)
-#define        GITS_CTLR_ITS_NUMBER_SHIFT      4
-#define        GITS_CTLR_ITS_NUMBER            (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
-#define GITS_CTLR_QUIESCENT            (1U << 31)
-
-#define GITS_TYPER_PLPIS               (1UL << 0)
-#define GITS_TYPER_VLPIS               (1UL << 1)
-#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT        4
-#define GITS_TYPER_ITT_ENTRY_SIZE      GENMASK_ULL(7, 4)
-#define GITS_TYPER_IDBITS_SHIFT                8
-#define GITS_TYPER_DEVBITS_SHIFT       13
-#define GITS_TYPER_DEVBITS             GENMASK_ULL(17, 13)
-#define GITS_TYPER_PTA                 (1UL << 19)
-#define GITS_TYPER_HCC_SHIFT           24
-#define GITS_TYPER_HCC(r)              (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
-#define GITS_TYPER_VMOVP               (1ULL << 37)
-#define GITS_TYPER_VMAPP               (1ULL << 40)
-#define GITS_TYPER_SVPET               GENMASK_ULL(42, 41)
-
-#define GITS_IIDR_REV_SHIFT            12
-#define GITS_IIDR_REV_MASK             (0xf << GITS_IIDR_REV_SHIFT)
-#define GITS_IIDR_REV(r)               (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
-#define GITS_IIDR_PRODUCTID_SHIFT      24
-
-#define GITS_CBASER_VALID                      (1ULL << 63)
-#define GITS_CBASER_SHAREABILITY_SHIFT         (10)
-#define GITS_CBASER_INNER_CACHEABILITY_SHIFT   (59)
-#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT   (53)
-#define GITS_CBASER_SHAREABILITY_MASK                                  \
-       GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
-#define GITS_CBASER_INNER_CACHEABILITY_MASK                            \
-       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
-#define GITS_CBASER_OUTER_CACHEABILITY_MASK                            \
-       GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
-#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
-
-#define GITS_CBASER_InnerShareable                                     \
-       GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
-
-#define GITS_CBASER_nCnB       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
-#define GITS_CBASER_nC         GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
-#define GITS_CBASER_RaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
-#define GITS_CBASER_RaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
-#define GITS_CBASER_WaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
-#define GITS_CBASER_WaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
-#define GITS_CBASER_RaWaWt     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
-#define GITS_CBASER_RaWaWb     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
-
-#define GITS_CBASER_ADDRESS(cbaser)    ((cbaser) & GENMASK_ULL(51, 12))
-
-#define GITS_BASER_NR_REGS             8
-
-#define GITS_BASER_VALID                       (1ULL << 63)
-#define GITS_BASER_INDIRECT                    (1ULL << 62)
-
-#define GITS_BASER_INNER_CACHEABILITY_SHIFT    (59)
-#define GITS_BASER_OUTER_CACHEABILITY_SHIFT    (53)
-#define GITS_BASER_INNER_CACHEABILITY_MASK                             \
-       GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
-#define GITS_BASER_CACHEABILITY_MASK           GITS_BASER_INNER_CACHEABILITY_MASK
-#define GITS_BASER_OUTER_CACHEABILITY_MASK                             \
-       GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
-#define GITS_BASER_SHAREABILITY_MASK                                   \
-       GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
-
-#define GITS_BASER_nCnB                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
-#define GITS_BASER_nC          GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
-#define GITS_BASER_RaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
-#define GITS_BASER_RaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
-#define GITS_BASER_WaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
-#define GITS_BASER_WaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
-#define GITS_BASER_RaWaWt      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
-#define GITS_BASER_RaWaWb      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
-
-#define GITS_BASER_TYPE_SHIFT                  (56)
-#define GITS_BASER_TYPE(r)             (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
-#define GITS_BASER_ENTRY_SIZE_SHIFT            (48)
-#define GITS_BASER_ENTRY_SIZE(r)       ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
-#define GITS_BASER_ENTRY_SIZE_MASK     GENMASK_ULL(52, 48)
-#define GITS_BASER_PHYS_52_to_48(phys)                                 \
-       (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
-#define GITS_BASER_ADDR_48_to_52(baser)                                        \
-       (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
-
-#define GITS_BASER_SHAREABILITY_SHIFT  (10)
-#define GITS_BASER_InnerShareable                                      \
-       GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
-#define GITS_BASER_PAGE_SIZE_SHIFT     (8)
-#define __GITS_BASER_PSZ(sz)           (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
-#define GITS_BASER_PAGE_SIZE_4K                __GITS_BASER_PSZ(4K)
-#define GITS_BASER_PAGE_SIZE_16K       __GITS_BASER_PSZ(16K)
-#define GITS_BASER_PAGE_SIZE_64K       __GITS_BASER_PSZ(64K)
-#define GITS_BASER_PAGE_SIZE_MASK      __GITS_BASER_PSZ(MASK)
-#define GITS_BASER_PAGES_MAX           256
-#define GITS_BASER_PAGES_SHIFT         (0)
-#define GITS_BASER_NR_PAGES(r)         (((r) & 0xff) + 1)
-
-#define GITS_BASER_TYPE_NONE           0
-#define GITS_BASER_TYPE_DEVICE         1
-#define GITS_BASER_TYPE_VCPU           2
-#define GITS_BASER_TYPE_RESERVED3      3
-#define GITS_BASER_TYPE_COLLECTION     4
-#define GITS_BASER_TYPE_RESERVED5      5
-#define GITS_BASER_TYPE_RESERVED6      6
-#define GITS_BASER_TYPE_RESERVED7      7
-
-#define GITS_LVL1_ENTRY_SIZE           (8UL)
-
-/*
- * ITS commands
- */
-#define GITS_CMD_MAPD                  0x08
-#define GITS_CMD_MAPC                  0x09
-#define GITS_CMD_MAPTI                 0x0a
-#define GITS_CMD_MAPI                  0x0b
-#define GITS_CMD_MOVI                  0x01
-#define GITS_CMD_DISCARD               0x0f
-#define GITS_CMD_INV                   0x0c
-#define GITS_CMD_MOVALL                        0x0e
-#define GITS_CMD_INVALL                        0x0d
-#define GITS_CMD_INT                   0x03
-#define GITS_CMD_CLEAR                 0x04
-#define GITS_CMD_SYNC                  0x05
-
-/*
- * GICv4 ITS specific commands
- */
-#define GITS_CMD_GICv4(x)              ((x) | 0x20)
-#define GITS_CMD_VINVALL               GITS_CMD_GICv4(GITS_CMD_INVALL)
-#define GITS_CMD_VMAPP                 GITS_CMD_GICv4(GITS_CMD_MAPC)
-#define GITS_CMD_VMAPTI                        GITS_CMD_GICv4(GITS_CMD_MAPTI)
-#define GITS_CMD_VMOVI                 GITS_CMD_GICv4(GITS_CMD_MOVI)
-#define GITS_CMD_VSYNC                 GITS_CMD_GICv4(GITS_CMD_SYNC)
-/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
-#define GITS_CMD_VMOVP                 GITS_CMD_GICv4(2)
-#define GITS_CMD_VSGI                  GITS_CMD_GICv4(3)
-#define GITS_CMD_INVDB                 GITS_CMD_GICv4(0xe)
-
-/*
- * ITS error numbers
- */
-#define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
-#define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
-#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
-#define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
-#define E_ITS_MAPD_DEVICE_OOR                  0x010801
-#define E_ITS_MAPD_ITTSIZE_OOR                 0x010802
-#define E_ITS_MAPC_PROCNUM_OOR                 0x010902
-#define E_ITS_MAPC_COLLECTION_OOR              0x010903
-#define E_ITS_MAPTI_UNMAPPED_DEVICE            0x010a04
-#define E_ITS_MAPTI_ID_OOR                     0x010a05
-#define E_ITS_MAPTI_PHYSICALID_OOR             0x010a06
-#define E_ITS_INV_UNMAPPED_INTERRUPT           0x010c07
-#define E_ITS_INVALL_UNMAPPED_COLLECTION       0x010d09
-#define E_ITS_MOVALL_PROCNUM_OOR               0x010e01
-#define E_ITS_DISCARD_UNMAPPED_INTERRUPT       0x010f07
-
-/*
- * CPU interface registers
- */
-#define ICC_CTLR_EL1_EOImode_SHIFT     (1)
-#define ICC_CTLR_EL1_EOImode_drop_dir  (0U << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_EOImode_drop      (1U << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_EOImode_MASK      (1 << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_CBPR_SHIFT                0
-#define ICC_CTLR_EL1_CBPR_MASK         (1 << ICC_CTLR_EL1_CBPR_SHIFT)
-#define ICC_CTLR_EL1_PMHE_SHIFT                6
-#define ICC_CTLR_EL1_PMHE_MASK         (1 << ICC_CTLR_EL1_PMHE_SHIFT)
-#define ICC_CTLR_EL1_PRI_BITS_SHIFT    8
-#define ICC_CTLR_EL1_PRI_BITS_MASK     (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
-#define ICC_CTLR_EL1_ID_BITS_SHIFT     11
-#define ICC_CTLR_EL1_ID_BITS_MASK      (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
-#define ICC_CTLR_EL1_SEIS_SHIFT                14
-#define ICC_CTLR_EL1_SEIS_MASK         (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
-#define ICC_CTLR_EL1_A3V_SHIFT         15
-#define ICC_CTLR_EL1_A3V_MASK          (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
-#define ICC_CTLR_EL1_RSS               (0x1 << 18)
-#define ICC_CTLR_EL1_ExtRange          (0x1 << 19)
-#define ICC_PMR_EL1_SHIFT              0
-#define ICC_PMR_EL1_MASK               (0xff << ICC_PMR_EL1_SHIFT)
-#define ICC_BPR0_EL1_SHIFT             0
-#define ICC_BPR0_EL1_MASK              (0x7 << ICC_BPR0_EL1_SHIFT)
-#define ICC_BPR1_EL1_SHIFT             0
-#define ICC_BPR1_EL1_MASK              (0x7 << ICC_BPR1_EL1_SHIFT)
-#define ICC_IGRPEN0_EL1_SHIFT          0
-#define ICC_IGRPEN0_EL1_MASK           (1 << ICC_IGRPEN0_EL1_SHIFT)
-#define ICC_IGRPEN1_EL1_SHIFT          0
-#define ICC_IGRPEN1_EL1_MASK           (1 << ICC_IGRPEN1_EL1_SHIFT)
-#define ICC_SRE_EL1_DIB                        (1U << 2)
-#define ICC_SRE_EL1_DFB                        (1U << 1)
-#define ICC_SRE_EL1_SRE                        (1U << 0)
-
-/* These are for GICv2 emulation only */
-#define GICH_LR_VIRTUALID              (0x3ffUL << 0)
-#define GICH_LR_PHYSID_CPUID_SHIFT     (10)
-#define GICH_LR_PHYSID_CPUID           (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
-
-#define ICC_IAR1_EL1_SPURIOUS          0x3ff
-
-#define ICC_SRE_EL2_SRE                        (1 << 0)
-#define ICC_SRE_EL2_ENABLE             (1 << 3)
-
-#define ICC_SGI1R_TARGET_LIST_SHIFT    0
-#define ICC_SGI1R_TARGET_LIST_MASK     (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
-#define ICC_SGI1R_AFFINITY_1_SHIFT     16
-#define ICC_SGI1R_AFFINITY_1_MASK      (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
-#define ICC_SGI1R_SGI_ID_SHIFT         24
-#define ICC_SGI1R_SGI_ID_MASK          (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
-#define ICC_SGI1R_AFFINITY_2_SHIFT     32
-#define ICC_SGI1R_AFFINITY_2_MASK      (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
-#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
-#define ICC_SGI1R_RS_SHIFT             44
-#define ICC_SGI1R_RS_MASK              (0xfULL << ICC_SGI1R_RS_SHIFT)
-#define ICC_SGI1R_AFFINITY_3_SHIFT     48
-#define ICC_SGI1R_AFFINITY_3_MASK      (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h

deleted file mode 100644 (file)

index 3722ed9..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __SELFTESTS_GIC_V3_ITS_H__
-#define __SELFTESTS_GIC_V3_ITS_H__
-
-#include <linux/sizes.h>
-
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
-             vm_paddr_t device_tbl, size_t device_tbl_sz,
-             vm_paddr_t cmdq, size_t cmdq_size);
-
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
-                      size_t itt_size, bool valid);
-void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
-void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
-                       u32 collection_id, u32 intid);
-void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
-
-#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h

deleted file mode 100644 (file)

index e43a57d..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-struct kvm_vm_arch {};
-
-#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h

deleted file mode 100644 (file)

index 1e8d0d5..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ /dev/null
@@ -1,238 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AArch64 processor specific defines
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include "kvm_util.h"
-#include "ucall_common.h"
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <asm/brk-imm.h>
-#include <asm/esr.h>
-#include <asm/sysreg.h>
-
-
-#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
-                          KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-/*
- * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
- * SYS_* register definitions in asm/sysreg.h to use in KVM
- * calls such as vcpu_get_reg() and vcpu_set_reg().
- */
-#define KVM_ARM64_SYS_REG(sys_reg_id)                  \
-       ARM64_SYS_REG(sys_reg_Op0(sys_reg_id),          \
-                       sys_reg_Op1(sys_reg_id),        \
-                       sys_reg_CRn(sys_reg_id),        \
-                       sys_reg_CRm(sys_reg_id),        \
-                       sys_reg_Op2(sys_reg_id))
-
-/*
- * Default MAIR
- *                  index   attribute
- * DEVICE_nGnRnE      0     0000:0000
- * DEVICE_nGnRE       1     0000:0100
- * DEVICE_GRE         2     0000:1100
- * NORMAL_NC          3     0100:0100
- * NORMAL             4     1111:1111
- * NORMAL_WT          5     1011:1011
- */
-
-/* Linux doesn't use these memory types, so let's define them. */
-#define MAIR_ATTR_DEVICE_GRE   UL(0x0c)
-#define MAIR_ATTR_NORMAL_WT    UL(0xbb)
-
-#define MT_DEVICE_nGnRnE       0
-#define MT_DEVICE_nGnRE                1
-#define MT_DEVICE_GRE          2
-#define MT_NORMAL_NC           3
-#define MT_NORMAL              4
-#define MT_NORMAL_WT           5
-
-#define DEFAULT_MAIR_EL1                                                       \
-       (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |              \
-        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |                \
-        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |                    \
-        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |                      \
-        MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |                            \
-        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
-
-void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 struct kvm_vcpu_init *init, void *guest_code);
-
-struct ex_regs {
-       u64 regs[31];
-       u64 sp;
-       u64 pc;
-       u64 pstate;
-};
-
-#define VECTOR_NUM     16
-
-enum {
-       VECTOR_SYNC_CURRENT_SP0,
-       VECTOR_IRQ_CURRENT_SP0,
-       VECTOR_FIQ_CURRENT_SP0,
-       VECTOR_ERROR_CURRENT_SP0,
-
-       VECTOR_SYNC_CURRENT,
-       VECTOR_IRQ_CURRENT,
-       VECTOR_FIQ_CURRENT,
-       VECTOR_ERROR_CURRENT,
-
-       VECTOR_SYNC_LOWER_64,
-       VECTOR_IRQ_LOWER_64,
-       VECTOR_FIQ_LOWER_64,
-       VECTOR_ERROR_LOWER_64,
-
-       VECTOR_SYNC_LOWER_32,
-       VECTOR_IRQ_LOWER_32,
-       VECTOR_FIQ_LOWER_32,
-       VECTOR_ERROR_LOWER_32,
-};
-
-#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
-                          (v) == VECTOR_SYNC_CURRENT     || \
-                          (v) == VECTOR_SYNC_LOWER_64    || \
-                          (v) == VECTOR_SYNC_LOWER_32)
-
-/* Access flag */
-#define PTE_AF                 (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA             (1ULL << 39)
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
-                                       uint32_t *ipa16k, uint32_t *ipa64k);
-
-void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
-
-typedef void(*handler_fn)(struct ex_regs *);
-void vm_install_exception_handler(struct kvm_vm *vm,
-               int vector, handler_fn handler);
-void vm_install_sync_handler(struct kvm_vm *vm,
-               int vector, int ec, handler_fn handler);
-
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
-
-static inline void cpu_relax(void)
-{
-       asm volatile("yield" ::: "memory");
-}
-
-#define isb()          asm volatile("isb" : : : "memory")
-#define dsb(opt)       asm volatile("dsb " #opt : : : "memory")
-#define dmb(opt)       asm volatile("dmb " #opt : : : "memory")
-
-#define dma_wmb()      dmb(oshst)
-#define __iowmb()      dma_wmb()
-
-#define dma_rmb()      dmb(oshld)
-
-#define __iormb(v)                                                     \
-({                                                                     \
-       unsigned long tmp;                                              \
-                                                                       \
-       dma_rmb();                                                      \
-                                                                       \
-       /*                                                              \
-        * Courtesy of arch/arm64/include/asm/io.h:                     \
-        * Create a dummy control dependency from the IO read to any    \
-        * later instructions. This ensures that a subsequent call      \
-        * to udelay() will be ordered due to the ISB in __delay().     \
-        */                                                             \
-       asm volatile("eor       %0, %1, %1\n"                           \
-                    "cbnz      %0, ."                                  \
-                    : "=r" (tmp) : "r" ((unsigned long)(v))            \
-                    : "memory");                                       \
-})
-
-static __always_inline void __raw_writel(u32 val, volatile void *addr)
-{
-       asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
-}
-
-static __always_inline u32 __raw_readl(const volatile void *addr)
-{
-       u32 val;
-       asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
-       return val;
-}
-
-static __always_inline void __raw_writeq(u64 val, volatile void *addr)
-{
-       asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
-}
-
-static __always_inline u64 __raw_readq(const volatile void *addr)
-{
-       u64 val;
-       asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
-       return val;
-}
-
-#define writel_relaxed(v,c)    ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-#define readl_relaxed(c)       ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-#define writeq_relaxed(v,c)    ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-#define readq_relaxed(c)       ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-
-#define writel(v,c)            ({ __iowmb(); writel_relaxed((v),(c));})
-#define readl(c)               ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
-#define writeq(v,c)            ({ __iowmb(); writeq_relaxed((v),(c));})
-#define readq(c)               ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
-
-
-static inline void local_irq_enable(void)
-{
-       asm volatile("msr daifclr, #3" : : : "memory");
-}
-
-static inline void local_irq_disable(void)
-{
-       asm volatile("msr daifset, #3" : : : "memory");
-}
-
-/**
- * struct arm_smccc_res - Result from SMC/HVC call
- * @a0-a3 result values from registers 0 to 3
- */
-struct arm_smccc_res {
-       unsigned long a0;
-       unsigned long a1;
-       unsigned long a2;
-       unsigned long a3;
-};
-
-/**
- * smccc_hvc - Invoke a SMCCC function using the hvc conduit
- * @function_id: the SMCCC function to be called
- * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
- * @res: pointer to write the return values from registers x0-x3
- *
- */
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res);
-
-/**
- * smccc_smc - Invoke a SMCCC function using the smc conduit
- * @function_id: the SMCCC function to be called
- * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
- * @res: pointer to write the return values from registers x0-x3
- *
- */
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res);
-
-/* Execute a Wait For Interrupt instruction. */
-void wfi(void);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/aarch64/spinlock.h

deleted file mode 100644 (file)

index cf09841..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
-#define SELFTEST_KVM_ARM64_SPINLOCK_H
-
-struct spinlock {
-       int v;
-};
-
-extern void spin_lock(struct spinlock *lock);
-extern void spin_unlock(struct spinlock *lock);
-
-#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h

deleted file mode 100644 (file)

index 4ec801f..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/ucall.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
-
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-extern vm_vaddr_t *ucall_exit_mmio_addr;
-
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h

deleted file mode 100644 (file)

index c481d0c..0000000
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) host specific defines
- */
-
-#ifndef SELFTEST_KVM_VGIC_H
-#define SELFTEST_KVM_VGIC_H
-
-#include <linux/kvm.h>
-
-#include "kvm_util.h"
-
-#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
-       (((uint64_t)(count) << 52) | \
-       ((uint64_t)((base) >> 16) << 16) | \
-       ((uint64_t)(flags) << 12) | \
-       index)
-
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
-
-#define VGIC_MAX_RESERVED      1023
-
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-
-/* The vcpu arg only applies to private interrupts. */
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-
-#define KVM_IRQCHIP_NUM_PINS   (1020 - 32)
-
-int vgic_its_setup(struct kvm_vm *vm);
-
-#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h

new file mode 100644 (file)

index 0000000..bf461de
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+       VIRTUAL,
+       PHYSICAL,
+};
+
+#define CTL_ENABLE     (1 << 0)
+#define CTL_IMASK      (1 << 1)
+#define CTL_ISTATUS    (1 << 2)
+
+#define msec_to_cycles(msec)   \
+       (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec)   \
+       (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+       ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+       return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+       isb();
+
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntvct_el0);
+       case PHYSICAL:
+               return read_sysreg(cntpct_el0);
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+       switch (timer) {
+       case VIRTUAL:
+               write_sysreg(cval, cntv_cval_el0);
+               break;
+       case PHYSICAL:
+               write_sysreg(cval, cntp_cval_el0);
+               break;
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntv_cval_el0);
+       case PHYSICAL:
+               return read_sysreg(cntp_cval_el0);
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
+{
+       switch (timer) {
+       case VIRTUAL:
+               write_sysreg(tval, cntv_tval_el0);
+               break;
+       case PHYSICAL:
+               write_sysreg(tval, cntp_tval_el0);
+               break;
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       isb();
+}
+
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+       isb();
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntv_tval_el0);
+       case PHYSICAL:
+               return read_sysreg(cntp_tval_el0);
+       default:
+               GUEST_FAIL("Could not get timer %d\n", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+       switch (timer) {
+       case VIRTUAL:
+               write_sysreg(ctl, cntv_ctl_el0);
+               break;
+       case PHYSICAL:
+               write_sysreg(ctl, cntp_ctl_el0);
+               break;
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntv_ctl_el0);
+       case PHYSICAL:
+               return read_sysreg(cntp_ctl_el0);
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+       uint64_t now_ct = timer_get_cntct(timer);
+       uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+       timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+       timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h

new file mode 100644 (file)

index 0000000..329e4f5
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+       enum arch_timer timer = VIRTUAL;
+       uint64_t start = timer_get_cntct(timer);
+
+       while ((timer_get_cntct(timer) - start) < cycles)
+               cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+       __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h

new file mode 100644 (file)

index 0000000..baeb3c8
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+#include <asm/kvm.h>
+
+enum gic_type {
+       GIC_V3,
+       GIC_TYPE_MAX,
+};
+
+/*
+ * Note that the redistributor frames are at the end, as the range scales
+ * with the number of vCPUs in the VM.
+ */
+#define GITS_BASE_GPA          0x8000000ULL
+#define GICD_BASE_GPA          (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
+#define GICR_BASE_GPA          (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
+
+/* The GIC is identity-mapped into the guest at the time of setup. */
+#define GITS_BASE_GVA          ((volatile void *)GITS_BASE_GPA)
+#define GICD_BASE_GVA          ((volatile void *)GICD_BASE_GPA)
+#define GICR_BASE_GVA          ((volatile void *)GICR_BASE_GPA)
+
+#define MIN_SGI                        0
+#define MIN_PPI                        16
+#define MIN_SPI                        32
+#define MAX_SPI                        1019
+#define IAR_SPURIOUS           1023
+
+#define INTID_IS_SGI(intid)    (0       <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid)    (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid)    (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
+void gic_init(enum gic_type type, unsigned int nr_cpus);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+                          vm_paddr_t pend_table);
+
+#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h

new file mode 100644 (file)

index 0000000..a76615f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+#ifndef __SELFTESTS_GIC_V3_H
+#define __SELFTESTS_GIC_V3_H
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR                      0x0000
+#define GICD_TYPER                     0x0004
+#define GICD_IIDR                      0x0008
+#define GICD_TYPER2                    0x000C
+#define GICD_STATUSR                   0x0010
+#define GICD_SETSPI_NSR                        0x0040
+#define GICD_CLRSPI_NSR                        0x0048
+#define GICD_SETSPI_SR                 0x0050
+#define GICD_CLRSPI_SR                 0x0058
+#define GICD_IGROUPR                   0x0080
+#define GICD_ISENABLER                 0x0100
+#define GICD_ICENABLER                 0x0180
+#define GICD_ISPENDR                   0x0200
+#define GICD_ICPENDR                   0x0280
+#define GICD_ISACTIVER                 0x0300
+#define GICD_ICACTIVER                 0x0380
+#define GICD_IPRIORITYR                        0x0400
+#define GICD_ICFGR                     0x0C00
+#define GICD_IGRPMODR                  0x0D00
+#define GICD_NSACR                     0x0E00
+#define GICD_IGROUPRnE                 0x1000
+#define GICD_ISENABLERnE               0x1200
+#define GICD_ICENABLERnE               0x1400
+#define GICD_ISPENDRnE                 0x1600
+#define GICD_ICPENDRnE                 0x1800
+#define GICD_ISACTIVERnE               0x1A00
+#define GICD_ICACTIVERnE               0x1C00
+#define GICD_IPRIORITYRnE              0x2000
+#define GICD_ICFGRnE                   0x3000
+#define GICD_IROUTER                   0x6000
+#define GICD_IROUTERnE                 0x8000
+#define GICD_IDREGS                    0xFFD0
+#define GICD_PIDR2                     0xFFE8
+
+#define ESPI_BASE_INTID                        4096
+
+/*
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
+ */
+#define GICD_ITARGETSR                 0x0800
+#define GICD_SGIR                      0x0F00
+#define GICD_CPENDSGIR                 0x0F10
+#define GICD_SPENDSGIR                 0x0F20
+
+#define GICD_CTLR_RWP                  (1U << 31)
+#define GICD_CTLR_nASSGIreq            (1U << 8)
+#define GICD_CTLR_DS                   (1U << 6)
+#define GICD_CTLR_ARE_NS               (1U << 4)
+#define GICD_CTLR_ENABLE_G1A           (1U << 1)
+#define GICD_CTLR_ENABLE_G1            (1U << 0)
+
+#define GICD_IIDR_IMPLEMENTER_SHIFT    0
+#define GICD_IIDR_IMPLEMENTER_MASK     (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT       12
+#define GICD_IIDR_REVISION_MASK                (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT                16
+#define GICD_IIDR_VARIANT_MASK         (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT     24
+#define GICD_IIDR_PRODUCT_ID_MASK      (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1         (1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0         (1U << 0)
+
+#define GICD_TYPER_RSS                 (1U << 26)
+#define GICD_TYPER_LPIS                        (1U << 17)
+#define GICD_TYPER_MBIS                        (1U << 16)
+#define GICD_TYPER_ESPI                        (1U << 8)
+
+#define GICD_TYPER_ID_BITS(typer)      ((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer)     ((((typer) >> 11) & 0x1f) + 1)
+#define GICD_TYPER_SPIS(typer)         ((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_ESPIS(typer)                                                \
+       (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
+
+#define GICD_TYPER2_nASSGIcap          (1U << 8)
+#define GICD_TYPER2_VIL                        (1U << 7)
+#define GICD_TYPER2_VID                        GENMASK(4, 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE      (0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY      (1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK            0xf0
+#define GIC_PIDR2_ARCH_GICv3           0x30
+#define GIC_PIDR2_ARCH_GICv4           0x40
+
+#define GIC_V3_DIST_SIZE               0x10000
+
+#define GIC_PAGE_SIZE_4K               0ULL
+#define GIC_PAGE_SIZE_16K              1ULL
+#define GIC_PAGE_SIZE_64K              2ULL
+#define GIC_PAGE_SIZE_MASK             3ULL
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR                      GICD_CTLR
+#define GICR_IIDR                      0x0004
+#define GICR_TYPER                     0x0008
+#define GICR_STATUSR                   GICD_STATUSR
+#define GICR_WAKER                     0x0014
+#define GICR_SETLPIR                   0x0040
+#define GICR_CLRLPIR                   0x0048
+#define GICR_PROPBASER                 0x0070
+#define GICR_PENDBASER                 0x0078
+#define GICR_INVLPIR                   0x00A0
+#define GICR_INVALLR                   0x00B0
+#define GICR_SYNCR                     0x00C0
+#define GICR_IDREGS                    GICD_IDREGS
+#define GICR_PIDR2                     GICD_PIDR2
+
+#define GICR_CTLR_ENABLE_LPIS          (1UL << 0)
+#define GICR_CTLR_CES                  (1UL << 1)
+#define GICR_CTLR_IR                   (1UL << 2)
+#define GICR_CTLR_RWP                  (1UL << 3)
+
+#define GICR_TYPER_CPU_NUMBER(r)       (((r) >> 8) & 0xffff)
+
+#define EPPI_BASE_INTID                        1056
+
+#define GICR_TYPER_NR_PPIS(r)                                          \
+       ({                                                              \
+               unsigned int __ppinum = ((r) >> 27) & 0x1f;             \
+               unsigned int __nr_ppis = 16;                            \
+               if (__ppinum == 1 || __ppinum == 2)                     \
+                       __nr_ppis +=  __ppinum * 32;                    \
+                                                                       \
+               __nr_ppis;                                              \
+        })
+
+#define GICR_WAKER_ProcessorSleep      (1U << 1)
+#define GICR_WAKER_ChildrenAsleep      (1U << 2)
+
+#define GIC_BASER_CACHE_nCnB           0ULL
+#define GIC_BASER_CACHE_SameAsInner    0ULL
+#define GIC_BASER_CACHE_nC             1ULL
+#define GIC_BASER_CACHE_RaWt           2ULL
+#define GIC_BASER_CACHE_RaWb           3ULL
+#define GIC_BASER_CACHE_WaWt           4ULL
+#define GIC_BASER_CACHE_WaWb           5ULL
+#define GIC_BASER_CACHE_RaWaWt         6ULL
+#define GIC_BASER_CACHE_RaWaWb         7ULL
+#define GIC_BASER_CACHE_MASK           7ULL
+#define GIC_BASER_NonShareable         0ULL
+#define GIC_BASER_InnerShareable       1ULL
+#define GIC_BASER_OuterShareable       2ULL
+#define GIC_BASER_SHAREABILITY_MASK    3ULL
+
+#define GIC_BASER_CACHEABILITY(reg, inner_outer, type)                 \
+       (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
+
+#define GIC_BASER_SHAREABILITY(reg, type)                              \
+       (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
+
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
+#define GICR_PROPBASER_SHAREABILITY_SHIFT              (10)
+#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT                (7)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT                (56)
+#define GICR_PROPBASER_SHAREABILITY_MASK                               \
+       GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
+#define GICR_PROPBASER_INNER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
+#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PROPBASER_InnerShareable                                  \
+       GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
+
+#define GICR_PROPBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
+#define GICR_PROPBASER_nC      GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
+#define GICR_PROPBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
+#define GICR_PROPBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
+#define GICR_PROPBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
+#define GICR_PROPBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
+#define GICR_PROPBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
+#define GICR_PROPBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
+
+#define GICR_PROPBASER_IDBITS_MASK                     (0x1f)
+#define GICR_PROPBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 16))
+
+#define GICR_PENDBASER_SHAREABILITY_SHIFT              (10)
+#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT                (7)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT                (56)
+#define GICR_PENDBASER_SHAREABILITY_MASK                               \
+       GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
+#define GICR_PENDBASER_INNER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
+#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PENDBASER_InnerShareable                                  \
+       GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
+
+#define GICR_PENDBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
+#define GICR_PENDBASER_nC      GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
+#define GICR_PENDBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
+#define GICR_PENDBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
+#define GICR_PENDBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
+#define GICR_PENDBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
+#define GICR_PENDBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
+#define GICR_PENDBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
+
+#define GICR_PENDBASER_PTZ                             BIT_ULL(62)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_IGROUPR0                  GICD_IGROUPR
+#define GICR_ISENABLER0                        GICD_ISENABLER
+#define GICR_ICENABLER0                        GICD_ICENABLER
+#define GICR_ISPENDR0                  GICD_ISPENDR
+#define GICR_ICPENDR0                  GICD_ICPENDR
+#define GICR_ISACTIVER0                        GICD_ISACTIVER
+#define GICR_ICACTIVER0                        GICD_ICACTIVER
+#define GICR_IPRIORITYR0               GICD_IPRIORITYR
+#define GICR_ICFGR0                    GICD_ICFGR
+#define GICR_IGRPMODR0                 GICD_IGRPMODR
+#define GICR_NSACR                     GICD_NSACR
+
+#define GICR_TYPER_PLPIS               (1U << 0)
+#define GICR_TYPER_VLPIS               (1U << 1)
+#define GICR_TYPER_DIRTY               (1U << 2)
+#define GICR_TYPER_DirectLPIS          (1U << 3)
+#define GICR_TYPER_LAST                        (1U << 4)
+#define GICR_TYPER_RVPEID              (1U << 7)
+#define GICR_TYPER_COMMON_LPI_AFF      GENMASK_ULL(25, 24)
+#define GICR_TYPER_AFFINITY            GENMASK_ULL(63, 32)
+
+#define GICR_INVLPIR_INTID             GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID             GENMASK_ULL(47, 32)
+#define GICR_INVLPIR_V                 GENMASK_ULL(63, 63)
+
+#define GICR_INVALLR_VPEID             GICR_INVLPIR_VPEID
+#define GICR_INVALLR_V                 GICR_INVLPIR_V
+
+#define GIC_V3_REDIST_SIZE             0x20000
+
+#define LPI_PROP_GROUP1                        (1 << 1)
+#define LPI_PROP_ENABLED               (1 << 0)
+
+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER                        0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK    0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT             (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT       (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT       (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK                              \
+       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK                              \
+       GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable                                 \
+       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
+#define GICR_VPROPBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+/*
+ * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
+ * VPROPBASER and ITS_BASER. Just not quite any of the two.
+ */
+#define GICR_VPROPBASER_4_1_VALID      (1ULL << 63)
+#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
+#define GICR_VPROPBASER_4_1_INDIRECT   (1ULL << 55)
+#define GICR_VPROPBASER_4_1_PAGE_SIZE  GENMASK_ULL(54, 53)
+#define GICR_VPROPBASER_4_1_Z          (1ULL << 52)
+#define GICR_VPROPBASER_4_1_ADDR       GENMASK_ULL(51, 12)
+#define GICR_VPROPBASER_4_1_SIZE       GENMASK_ULL(6, 0)
+
+#define GICR_VPENDBASER                        0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT             (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT       (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT       (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK                              \
+       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK                              \
+       GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable                                   \
+       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_InnerShareable                                 \
+       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
+
+#define GICR_VPENDBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
+#define GICR_VPENDBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER_Dirty          (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast    (1ULL << 61)
+#define GICR_VPENDBASER_IDAI           (1ULL << 62)
+#define GICR_VPENDBASER_Valid          (1ULL << 63)
+
+/*
+ * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
+ * also use the above Valid, PendingLast and Dirty.
+ */
+#define GICR_VPENDBASER_4_1_DB         (1ULL << 62)
+#define GICR_VPENDBASER_4_1_VGRP0EN    (1ULL << 59)
+#define GICR_VPENDBASER_4_1_VGRP1EN    (1ULL << 58)
+#define GICR_VPENDBASER_4_1_VPEID      GENMASK_ULL(15, 0)
+
+#define GICR_VSGIR                     0x0080
+
+#define GICR_VSGIR_VPEID               GENMASK(15, 0)
+
+#define GICR_VSGIPENDR                 0x0088
+
+#define GICR_VSGIPENDR_BUSY            (1U << 31)
+#define GICR_VSGIPENDR_PENDING         GENMASK(15, 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR                      0x0000
+#define GITS_IIDR                      0x0004
+#define GITS_TYPER                     0x0008
+#define GITS_MPIDR                     0x0018
+#define GITS_CBASER                    0x0080
+#define GITS_CWRITER                   0x0088
+#define GITS_CREADR                    0x0090
+#define GITS_BASER                     0x0100
+#define GITS_IDREGS_BASE               0xffd0
+#define GITS_PIDR0                     0xffe0
+#define GITS_PIDR1                     0xffe4
+#define GITS_PIDR2                     GICR_PIDR2
+#define GITS_PIDR4                     0xffd0
+#define GITS_CIDR0                     0xfff0
+#define GITS_CIDR1                     0xfff4
+#define GITS_CIDR2                     0xfff8
+#define GITS_CIDR3                     0xfffc
+
+#define GITS_TRANSLATER                        0x10040
+
+#define GITS_SGIR                      0x20020
+
+#define GITS_SGIR_VPEID                        GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID               GENMASK_ULL(3, 0)
+
+#define GITS_CTLR_ENABLE               (1U << 0)
+#define GITS_CTLR_ImDe                 (1U << 1)
+#define        GITS_CTLR_ITS_NUMBER_SHIFT      4
+#define        GITS_CTLR_ITS_NUMBER            (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
+#define GITS_CTLR_QUIESCENT            (1U << 31)
+
+#define GITS_TYPER_PLPIS               (1UL << 0)
+#define GITS_TYPER_VLPIS               (1UL << 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT        4
+#define GITS_TYPER_ITT_ENTRY_SIZE      GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS_SHIFT                8
+#define GITS_TYPER_DEVBITS_SHIFT       13
+#define GITS_TYPER_DEVBITS             GENMASK_ULL(17, 13)
+#define GITS_TYPER_PTA                 (1UL << 19)
+#define GITS_TYPER_HCC_SHIFT           24
+#define GITS_TYPER_HCC(r)              (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
+#define GITS_TYPER_VMOVP               (1ULL << 37)
+#define GITS_TYPER_VMAPP               (1ULL << 40)
+#define GITS_TYPER_SVPET               GENMASK_ULL(42, 41)
+
+#define GITS_IIDR_REV_SHIFT            12
+#define GITS_IIDR_REV_MASK             (0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r)               (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT      24
+
+#define GITS_CBASER_VALID                      (1ULL << 63)
+#define GITS_CBASER_SHAREABILITY_SHIFT         (10)
+#define GITS_CBASER_INNER_CACHEABILITY_SHIFT   (59)
+#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT   (53)
+#define GITS_CBASER_SHAREABILITY_MASK                                  \
+       GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
+#define GITS_CBASER_INNER_CACHEABILITY_MASK                            \
+       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
+#define GITS_CBASER_OUTER_CACHEABILITY_MASK                            \
+       GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
+#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
+
+#define GITS_CBASER_InnerShareable                                     \
+       GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
+
+#define GITS_CBASER_nCnB       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
+#define GITS_CBASER_nC         GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
+#define GITS_CBASER_RaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
+#define GITS_CBASER_RaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
+#define GITS_CBASER_WaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
+#define GITS_CBASER_WaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
+#define GITS_CBASER_RaWaWt     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
+#define GITS_CBASER_RaWaWb     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
+
+#define GITS_CBASER_ADDRESS(cbaser)    ((cbaser) & GENMASK_ULL(51, 12))
+
+#define GITS_BASER_NR_REGS             8
+
+#define GITS_BASER_VALID                       (1ULL << 63)
+#define GITS_BASER_INDIRECT                    (1ULL << 62)
+
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT    (59)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT    (53)
+#define GITS_BASER_INNER_CACHEABILITY_MASK                             \
+       GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
+#define GITS_BASER_CACHEABILITY_MASK           GITS_BASER_INNER_CACHEABILITY_MASK
+#define GITS_BASER_OUTER_CACHEABILITY_MASK                             \
+       GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
+#define GITS_BASER_SHAREABILITY_MASK                                   \
+       GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
+
+#define GITS_BASER_nCnB                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
+#define GITS_BASER_nC          GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
+#define GITS_BASER_RaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
+#define GITS_BASER_RaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
+#define GITS_BASER_WaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
+#define GITS_BASER_WaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
+#define GITS_BASER_RaWaWt      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
+#define GITS_BASER_RaWaWb      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
+
+#define GITS_BASER_TYPE_SHIFT                  (56)
+#define GITS_BASER_TYPE(r)             (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT            (48)
+#define GITS_BASER_ENTRY_SIZE(r)       ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK     GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys)                                 \
+       (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser)                                        \
+       (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
+#define GITS_BASER_SHAREABILITY_SHIFT  (10)
+#define GITS_BASER_InnerShareable                                      \
+       GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
+#define GITS_BASER_PAGE_SIZE_SHIFT     (8)
+#define __GITS_BASER_PSZ(sz)           (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K                __GITS_BASER_PSZ(4K)
+#define GITS_BASER_PAGE_SIZE_16K       __GITS_BASER_PSZ(16K)
+#define GITS_BASER_PAGE_SIZE_64K       __GITS_BASER_PSZ(64K)
+#define GITS_BASER_PAGE_SIZE_MASK      __GITS_BASER_PSZ(MASK)
+#define GITS_BASER_PAGES_MAX           256
+#define GITS_BASER_PAGES_SHIFT         (0)
+#define GITS_BASER_NR_PAGES(r)         (((r) & 0xff) + 1)
+
+#define GITS_BASER_TYPE_NONE           0
+#define GITS_BASER_TYPE_DEVICE         1
+#define GITS_BASER_TYPE_VCPU           2
+#define GITS_BASER_TYPE_RESERVED3      3
+#define GITS_BASER_TYPE_COLLECTION     4
+#define GITS_BASER_TYPE_RESERVED5      5
+#define GITS_BASER_TYPE_RESERVED6      6
+#define GITS_BASER_TYPE_RESERVED7      7
+
+#define GITS_LVL1_ENTRY_SIZE           (8UL)
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD                  0x08
+#define GITS_CMD_MAPC                  0x09
+#define GITS_CMD_MAPTI                 0x0a
+#define GITS_CMD_MAPI                  0x0b
+#define GITS_CMD_MOVI                  0x01
+#define GITS_CMD_DISCARD               0x0f
+#define GITS_CMD_INV                   0x0c
+#define GITS_CMD_MOVALL                        0x0e
+#define GITS_CMD_INVALL                        0x0d
+#define GITS_CMD_INT                   0x03
+#define GITS_CMD_CLEAR                 0x04
+#define GITS_CMD_SYNC                  0x05
+
+/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x)              ((x) | 0x20)
+#define GITS_CMD_VINVALL               GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP                 GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI                        GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI                 GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC                 GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
+#define GITS_CMD_VMOVP                 GITS_CMD_GICv4(2)
+#define GITS_CMD_VSGI                  GITS_CMD_GICv4(3)
+#define GITS_CMD_INVDB                 GITS_CMD_GICv4(0xe)
+
+/*
+ * ITS error numbers
+ */
+#define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
+#define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
+#define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
+#define E_ITS_MAPD_DEVICE_OOR                  0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR                 0x010802
+#define E_ITS_MAPC_PROCNUM_OOR                 0x010902
+#define E_ITS_MAPC_COLLECTION_OOR              0x010903
+#define E_ITS_MAPTI_UNMAPPED_DEVICE            0x010a04
+#define E_ITS_MAPTI_ID_OOR                     0x010a05
+#define E_ITS_MAPTI_PHYSICALID_OOR             0x010a06
+#define E_ITS_INV_UNMAPPED_INTERRUPT           0x010c07
+#define E_ITS_INVALL_UNMAPPED_COLLECTION       0x010d09
+#define E_ITS_MOVALL_PROCNUM_OOR               0x010e01
+#define E_ITS_DISCARD_UNMAPPED_INTERRUPT       0x010f07
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_SHIFT     (1)
+#define ICC_CTLR_EL1_EOImode_drop_dir  (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop      (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK      (1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT                0
+#define ICC_CTLR_EL1_CBPR_MASK         (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT                6
+#define ICC_CTLR_EL1_PMHE_MASK         (1 << ICC_CTLR_EL1_PMHE_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT    8
+#define ICC_CTLR_EL1_PRI_BITS_MASK     (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT     11
+#define ICC_CTLR_EL1_ID_BITS_MASK      (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT                14
+#define ICC_CTLR_EL1_SEIS_MASK         (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT         15
+#define ICC_CTLR_EL1_A3V_MASK          (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_CTLR_EL1_RSS               (0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange          (0x1 << 19)
+#define ICC_PMR_EL1_SHIFT              0
+#define ICC_PMR_EL1_MASK               (0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT             0
+#define ICC_BPR0_EL1_MASK              (0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT             0
+#define ICC_BPR1_EL1_MASK              (0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT          0
+#define ICC_IGRPEN0_EL1_MASK           (1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT          0
+#define ICC_IGRPEN1_EL1_MASK           (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB                        (1U << 2)
+#define ICC_SRE_EL1_DFB                        (1U << 1)
+#define ICC_SRE_EL1_SRE                        (1U << 0)
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID              (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT     (10)
+#define GICH_LR_PHYSID_CPUID           (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+#define ICC_IAR1_EL1_SPURIOUS          0x3ff
+
+#define ICC_SRE_EL2_SRE                        (1 << 0)
+#define ICC_SRE_EL2_ENABLE             (1 << 3)
+
+#define ICC_SGI1R_TARGET_LIST_SHIFT    0
+#define ICC_SGI1R_TARGET_LIST_MASK     (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFFINITY_1_SHIFT     16
+#define ICC_SGI1R_AFFINITY_1_MASK      (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+#define ICC_SGI1R_SGI_ID_SHIFT         24
+#define ICC_SGI1R_SGI_ID_MASK          (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+#define ICC_SGI1R_AFFINITY_2_SHIFT     32
+#define ICC_SGI1R_AFFINITY_2_MASK      (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
+#define ICC_SGI1R_RS_SHIFT             44
+#define ICC_SGI1R_RS_MASK              (0xfULL << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFFINITY_3_SHIFT     48
+#define ICC_SGI1R_AFFINITY_3_MASK      (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h

new file mode 100644 (file)

index 0000000..3722ed9
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTESTS_GIC_V3_ITS_H__
+#define __SELFTESTS_GIC_V3_ITS_H__
+
+#include <linux/sizes.h>
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+             vm_paddr_t device_tbl, size_t device_tbl_sz,
+             vm_paddr_t cmdq, size_t cmdq_size);
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+                      size_t itt_size, bool valid);
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+                       u32 collection_id, u32 intid);
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+
+#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..e43a57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h

new file mode 100644 (file)

index 0000000..1e8d0d5
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/brk-imm.h>
+#include <asm/esr.h>
+#include <asm/sysreg.h>
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+                          KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as vcpu_get_reg() and vcpu_set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id)                  \
+       ARM64_SYS_REG(sys_reg_Op0(sys_reg_id),          \
+                       sys_reg_Op1(sys_reg_id),        \
+                       sys_reg_CRn(sys_reg_id),        \
+                       sys_reg_CRm(sys_reg_id),        \
+                       sys_reg_Op2(sys_reg_id))
+
+/*
+ * Default MAIR
+ *                  index   attribute
+ * DEVICE_nGnRnE      0     0000:0000
+ * DEVICE_nGnRE       1     0000:0100
+ * DEVICE_GRE         2     0000:1100
+ * NORMAL_NC          3     0100:0100
+ * NORMAL             4     1111:1111
+ * NORMAL_WT          5     1011:1011
+ */
+
+/* Linux doesn't use these memory types, so let's define them. */
+#define MAIR_ATTR_DEVICE_GRE   UL(0x0c)
+#define MAIR_ATTR_NORMAL_WT    UL(0xbb)
+
+#define MT_DEVICE_nGnRnE       0
+#define MT_DEVICE_nGnRE                1
+#define MT_DEVICE_GRE          2
+#define MT_NORMAL_NC           3
+#define MT_NORMAL              4
+#define MT_NORMAL_WT           5
+
+#define DEFAULT_MAIR_EL1                                                       \
+       (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |              \
+        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |                \
+        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |                    \
+        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |                      \
+        MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |                            \
+        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                 struct kvm_vcpu_init *init, void *guest_code);
+
+struct ex_regs {
+       u64 regs[31];
+       u64 sp;
+       u64 pc;
+       u64 pstate;
+};
+
+#define VECTOR_NUM     16
+
+enum {
+       VECTOR_SYNC_CURRENT_SP0,
+       VECTOR_IRQ_CURRENT_SP0,
+       VECTOR_FIQ_CURRENT_SP0,
+       VECTOR_ERROR_CURRENT_SP0,
+
+       VECTOR_SYNC_CURRENT,
+       VECTOR_IRQ_CURRENT,
+       VECTOR_FIQ_CURRENT,
+       VECTOR_ERROR_CURRENT,
+
+       VECTOR_SYNC_LOWER_64,
+       VECTOR_IRQ_LOWER_64,
+       VECTOR_FIQ_LOWER_64,
+       VECTOR_ERROR_LOWER_64,
+
+       VECTOR_SYNC_LOWER_32,
+       VECTOR_IRQ_LOWER_32,
+       VECTOR_FIQ_LOWER_32,
+       VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+                          (v) == VECTOR_SYNC_CURRENT     || \
+                          (v) == VECTOR_SYNC_LOWER_64    || \
+                          (v) == VECTOR_SYNC_LOWER_32)
+
+/* Access flag */
+#define PTE_AF                 (1ULL << 10)
+
+/* Access flag update enable/disable */
+#define TCR_EL1_HA             (1ULL << 39)
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+                                       uint32_t *ipa16k, uint32_t *ipa64k);
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+               int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+               int vector, int ec, handler_fn handler);
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline void cpu_relax(void)
+{
+       asm volatile("yield" ::: "memory");
+}
+
+#define isb()          asm volatile("isb" : : : "memory")
+#define dsb(opt)       asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt)       asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb()      dmb(oshst)
+#define __iowmb()      dma_wmb()
+
+#define dma_rmb()      dmb(oshld)
+
+#define __iormb(v)                                                     \
+({                                                                     \
+       unsigned long tmp;                                              \
+                                                                       \
+       dma_rmb();                                                      \
+                                                                       \
+       /*                                                              \
+        * Courtesy of arch/arm64/include/asm/io.h:                     \
+        * Create a dummy control dependency from the IO read to any    \
+        * later instructions. This ensures that a subsequent call      \
+        * to udelay() will be ordered due to the ISB in __delay().     \
+        */                                                             \
+       asm volatile("eor       %0, %1, %1\n"                           \
+                    "cbnz      %0, ."                                  \
+                    : "=r" (tmp) : "r" ((unsigned long)(v))            \
+                    : "memory");                                       \
+})
+
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+       asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+       u32 val;
+       asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+       return val;
+}
+
+static __always_inline void __raw_writeq(u64 val, volatile void *addr)
+{
+       asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u64 __raw_readq(const volatile void *addr)
+{
+       u64 val;
+       asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+       return val;
+}
+
+#define writel_relaxed(v,c)    ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c)       ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+#define writeq_relaxed(v,c)    ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define readq_relaxed(c)       ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
+
+#define writel(v,c)            ({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c)               ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define writeq(v,c)            ({ __iowmb(); writeq_relaxed((v),(c));})
+#define readq(c)               ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+
+
+static inline void local_irq_enable(void)
+{
+       asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+       asm volatile("msr daifset, #3" : : : "memory");
+}
+
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+       unsigned long a0;
+       unsigned long a1;
+       unsigned long a2;
+       unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res);
+
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res);
+
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h

new file mode 100644 (file)

index 0000000..cf09841
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+       int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h

new file mode 100644 (file)

index 0000000..4ec801f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h

new file mode 100644 (file)

index 0000000..c481d0c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+       (((uint64_t)(count) << 52) | \
+       ((uint64_t)((base) >> 16) << 16) | \
+       ((uint64_t)(flags) << 12) | \
+       index)
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+
+#define VGIC_MAX_RESERVED      1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS   (1020 - 32)
+
+int vgic_its_setup(struct kvm_vm *vm);
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/s390/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h

new file mode 100644 (file)

index 0000000..1bf2756
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/debug_print.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Definition for kernel virtual machines on s390x
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_DEBUG_PRINT_H
+#define SELFTEST_KVM_DEBUG_PRINT_H
+
+#include "asm/ptrace.h"
+#include "kvm_util.h"
+#include "sie.h"
+
+static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
+{
+       u64 pos;
+
+       pr_debug("%s (%p)\n", name, (void *)addr);
+       pr_debug("            0/0x00---------|");
+       if (len > 8)
+               pr_debug(" 8/0x08---------|");
+       if (len > 16)
+               pr_debug(" 16/0x10--------|");
+       if (len > 24)
+               pr_debug(" 24/0x18--------|");
+       for (pos = 0; pos < len; pos += 8) {
+               if ((pos % 32) == 0)
+                       pr_debug("\n %3lu 0x%.3lx ", pos, pos);
+               pr_debug(" %16lx", *((u64 *)(addr + pos)));
+       }
+       pr_debug("\n");
+}
+
+static inline void print_hex(const char *name, u64 addr)
+{
+       print_hex_bytes(name, addr, 512);
+}
+
+static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+       pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
+                run->flags,
+                run->psw_mask, run->psw_addr,
+                run->exit_reason, exit_reason_str(run->exit_reason));
+       pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
+                sie_block->psw_mask, sie_block->psw_addr);
+}
+
+static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+       print_hex_bytes("run", (u64)run, 0x150);
+       print_hex("sie_block", (u64)sie_block);
+       print_psw(run, sie_block);
+}
+
+static inline void print_regs(struct kvm_run *run)
+{
+       struct kvm_sync_regs *sync_regs = &run->s.regs;
+
+       print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
+       print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
+       print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
+}
+
+#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
diff --git a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h

new file mode 100644 (file)

index 0000000..b0ed713
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h

new file mode 100644 (file)

index 0000000..00a1ced
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/facility.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Get the facility bits with the STFLE instruction
+ */
+
+#ifndef SELFTEST_KVM_FACILITY_H
+#define SELFTEST_KVM_FACILITY_H
+
+#include <linux/bitops.h>
+
+/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
+#define NB_STFL_DOUBLEWORDS 32
+
+extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+extern bool stfle_flag;
+
+static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
+{
+       return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
+{
+       register unsigned long r0 asm("0") = nb_doublewords - 1;
+
+       asm volatile("  .insn   s,0xb2b00000,0(%1)\n"
+                       : "+d" (r0)
+                       : "a" (fac)
+                       : "memory", "cc");
+}
+
+static inline void setup_facilities(void)
+{
+       stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
+       stfle_flag = true;
+}
+
+static inline bool test_facility(int nr)
+{
+       if (!stfle_flag)
+               setup_facilities();
+       return test_bit_inv(nr, stfl_doublewords);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..e43a57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h

new file mode 100644 (file)

index 0000000..33fef6f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/processor.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * s390x processor specific defines
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <linux/compiler.h>
+
+/* Bits in the region/segment table entry */
+#define REGION_ENTRY_ORIGIN    ~0xfffUL /* region/segment table origin    */
+#define REGION_ENTRY_PROTECT   0x200    /* region protection bit          */
+#define REGION_ENTRY_NOEXEC    0x100    /* region no-execute bit          */
+#define REGION_ENTRY_OFFSET    0xc0     /* region table offset            */
+#define REGION_ENTRY_INVALID   0x20     /* invalid region table entry     */
+#define REGION_ENTRY_TYPE      0x0c     /* region/segment table type mask */
+#define REGION_ENTRY_LENGTH    0x03     /* region third length            */
+
+/* Bits in the page table entry */
+#define PAGE_INVALID   0x400           /* HW invalid bit    */
+#define PAGE_PROTECT   0x200           /* HW read-only bit  */
+#define PAGE_NOEXEC    0x100           /* HW no-execute bit */
+
+/* Page size definitions */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+/* Is there a portable way to do this? */
+static inline void cpu_relax(void)
+{
+       barrier();
+}
+
+/* Get the instruction length */
+static inline int insn_length(unsigned char code)
+{
+       return ((((int)code + 64) >> 7) + 1) << 1;
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h

new file mode 100644 (file)

index 0000000..160acd4
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/sie.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definition for kernel virtual machines on s390.
+ *
+ * Adapted copy of struct definition kvm_s390_sie_block from
+ * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
+ *
+ * Copyright IBM Corp. 2008, 2024
+ *
+ * Authors:
+ *  Christoph Schlameuss <schlameuss@linux.ibm.com>
+ *  Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_SIE_H
+#define SELFTEST_KVM_SIE_H
+
+#include <linux/types.h>
+
+struct kvm_s390_sie_block {
+#define CPUSTAT_STOPPED    0x80000000
+#define CPUSTAT_WAIT       0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT   0x04000000
+#define CPUSTAT_IO_INT     0x02000000
+#define CPUSTAT_EXT_INT    0x01000000
+#define CPUSTAT_RUNNING    0x00800000
+#define CPUSTAT_RETAINED   0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB    0x00010000
+#define CPUSTAT_RRF        0x00008000
+#define CPUSTAT_SLSV       0x00004000
+#define CPUSTAT_SLSR       0x00002000
+#define CPUSTAT_ZARCH      0x00000800
+#define CPUSTAT_MCDS       0x00000100
+#define CPUSTAT_KSS        0x00000200
+#define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
+#define CPUSTAT_GED2       0x00000010
+#define CPUSTAT_G          0x00000008
+#define CPUSTAT_GED        0x00000004
+#define CPUSTAT_J          0x00000002
+#define CPUSTAT_P          0x00000001
+       __u32 cpuflags;                 /* 0x0000 */
+       __u32: 1;                       /* 0x0004 */
+       __u32 prefix : 18;
+       __u32: 1;
+       __u32 ibc : 12;
+       __u8    reserved08[4];          /* 0x0008 */
+#define PROG_IN_SIE BIT(0)
+       __u32   prog0c;                 /* 0x000c */
+       union {
+               __u8    reserved10[16]; /* 0x0010 */
+               struct {
+                       __u64   pv_handle_cpu;
+                       __u64   pv_handle_config;
+               };
+       };
+#define PROG_BLOCK_SIE BIT(0)
+#define PROG_REQUEST   BIT(1)
+       __u32   prog20;                 /* 0x0020 */
+       __u8    reserved24[4];          /* 0x0024 */
+       __u64   cputm;                  /* 0x0028 */
+       __u64   ckc;                    /* 0x0030 */
+       __u64   epoch;                  /* 0x0038 */
+       __u32   svcc;                   /* 0x0040 */
+#define LCTL_CR0       0x8000
+#define LCTL_CR6       0x0200
+#define LCTL_CR9       0x0040
+#define LCTL_CR10      0x0020
+#define LCTL_CR11      0x0010
+#define LCTL_CR14      0x0002
+       __u16   lctl;                   /* 0x0044 */
+       __s16   icpua;                  /* 0x0046 */
+#define ICTL_OPEREXC   0x80000000
+#define ICTL_PINT      0x20000000
+#define ICTL_LPSW      0x00400000
+#define ICTL_STCTL     0x00040000
+#define ICTL_ISKE      0x00004000
+#define ICTL_SSKE      0x00002000
+#define ICTL_RRBE      0x00001000
+#define ICTL_TPROT     0x00000200
+       __u32   ictl;                   /* 0x0048 */
+#define ECA_CEI                0x80000000
+#define ECA_IB         0x40000000
+#define ECA_SIGPI      0x10000000
+#define ECA_MVPGI      0x01000000
+#define ECA_AIV                0x00200000
+#define ECA_VX         0x00020000
+#define ECA_PROTEXCI   0x00002000
+#define ECA_APIE       0x00000008
+#define ECA_SII                0x00000001
+       __u32   eca;                    /* 0x004c */
+#define ICPT_INST      0x04
+#define ICPT_PROGI     0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ    0x10
+#define ICPT_EXTINT    0x14
+#define ICPT_IOREQ     0x18
+#define ICPT_WAIT      0x1c
+#define ICPT_VALIDITY  0x20
+#define ICPT_STOP      0x28
+#define ICPT_OPEREXC   0x2C
+#define ICPT_PARTEXEC  0x38
+#define ICPT_IOINST    0x40
+#define ICPT_KSS       0x5c
+#define ICPT_MCHKREQ   0x60
+#define ICPT_INT_ENABLE        0x64
+#define ICPT_PV_INSTR  0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF   0x70
+       __u8    icptcode;               /* 0x0050 */
+       __u8    icptstatus;             /* 0x0051 */
+       __u16   ihcpu;                  /* 0x0052 */
+       __u8    reserved54;             /* 0x0054 */
+#define IICTL_CODE_NONE                 0x00
+#define IICTL_CODE_MCHK                 0x01
+#define IICTL_CODE_EXT          0x02
+#define IICTL_CODE_IO           0x03
+#define IICTL_CODE_RESTART      0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND      0x11
+       __u8    iictl;                  /* 0x0055 */
+       __u16   ipa;                    /* 0x0056 */
+       __u32   ipb;                    /* 0x0058 */
+       __u32   scaoh;                  /* 0x005c */
+#define FPF_BPBC       0x20
+       __u8    fpf;                    /* 0x0060 */
+#define ECB_GS         0x40
+#define ECB_TE         0x10
+#define ECB_SPECI      0x08
+#define ECB_SRSI       0x04
+#define ECB_HOSTPROTINT        0x02
+#define ECB_PTF                0x01
+       __u8    ecb;                    /* 0x0061 */
+#define ECB2_CMMA      0x80
+#define ECB2_IEP       0x20
+#define ECB2_PFMFI     0x08
+#define ECB2_ESCA      0x04
+#define ECB2_ZPCI_LSI  0x02
+       __u8    ecb2;                   /* 0x0062 */
+#define ECB3_AISI      0x20
+#define ECB3_AISII     0x10
+#define ECB3_DEA       0x08
+#define ECB3_AES       0x04
+#define ECB3_RI                0x01
+       __u8    ecb3;                   /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+       __u32   scaol;                  /* 0x0064 */
+       __u8    sdf;                    /* 0x0068 */
+       __u8    epdx;                   /* 0x0069 */
+       __u8    cpnc;                   /* 0x006a */
+       __u8    reserved6b;             /* 0x006b */
+       __u32   todpr;                  /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+       __u32   gd;                     /* 0x0070 */
+       __u8    reserved74[12];         /* 0x0074 */
+       __u64   mso;                    /* 0x0080 */
+       __u64   msl;                    /* 0x0088 */
+       __u64   psw_mask;               /* 0x0090 */
+       __u64   psw_addr;               /* 0x0098 */
+       __u64   gg14;                   /* 0x00a0 */
+       __u64   gg15;                   /* 0x00a8 */
+       __u8    reservedb0[8];          /* 0x00b0 */
+#define HPID_KVM       0x4
+#define HPID_VSIE      0x5
+       __u8    hpid;                   /* 0x00b8 */
+       __u8    reservedb9[7];          /* 0x00b9 */
+       union {
+               struct {
+                       __u32   eiparams;       /* 0x00c0 */
+                       __u16   extcpuaddr;     /* 0x00c4 */
+                       __u16   eic;            /* 0x00c6 */
+               };
+               __u64   mcic;                   /* 0x00c0 */
+       } __packed;
+       __u32   reservedc8;             /* 0x00c8 */
+       union {
+               struct {
+                       __u16   pgmilc;         /* 0x00cc */
+                       __u16   iprcc;          /* 0x00ce */
+               };
+               __u32   edc;                    /* 0x00cc */
+       } __packed;
+       union {
+               struct {
+                       __u32   dxc;            /* 0x00d0 */
+                       __u16   mcn;            /* 0x00d4 */
+                       __u8    perc;           /* 0x00d6 */
+                       __u8    peratmid;       /* 0x00d7 */
+               };
+               __u64   faddr;                  /* 0x00d0 */
+       } __packed;
+       __u64   peraddr;                /* 0x00d8 */
+       __u8    eai;                    /* 0x00e0 */
+       __u8    peraid;                 /* 0x00e1 */
+       __u8    oai;                    /* 0x00e2 */
+       __u8    armid;                  /* 0x00e3 */
+       __u8    reservede4[4];          /* 0x00e4 */
+       union {
+               __u64   tecmc;          /* 0x00e8 */
+               struct {
+                       __u16   subchannel_id;  /* 0x00e8 */
+                       __u16   subchannel_nr;  /* 0x00ea */
+                       __u32   io_int_parm;    /* 0x00ec */
+                       __u32   io_int_word;    /* 0x00f0 */
+               };
+       } __packed;
+       __u8    reservedf4[8];          /* 0x00f4 */
+#define CRYCB_FORMAT_MASK      0x00000003
+#define CRYCB_FORMAT0          0x00000000
+#define CRYCB_FORMAT1          0x00000001
+#define CRYCB_FORMAT2          0x00000003
+       __u32   crycbd;                 /* 0x00fc */
+       __u64   gcr[16];                /* 0x0100 */
+       union {
+               __u64   gbea;           /* 0x0180 */
+               __u64   sidad;
+       };
+       __u8    reserved188[8];         /* 0x0188 */
+       __u64   sdnxo;                  /* 0x0190 */
+       __u8    reserved198[8];         /* 0x0198 */
+       __u32   fac;                    /* 0x01a0 */
+       __u8    reserved1a4[20];        /* 0x01a4 */
+       __u64   cbrlo;                  /* 0x01b8 */
+       __u8    reserved1c0[8];         /* 0x01c0 */
+#define ECD_HOSTREGMGMT        0x20000000
+#define ECD_MEF                0x08000000
+#define ECD_ETOKENF    0x02000000
+#define ECD_ECC                0x00200000
+       __u32   ecd;                    /* 0x01c8 */
+       __u8    reserved1cc[18];        /* 0x01cc */
+       __u64   pp;                     /* 0x01de */
+       __u8    reserved1e6[2];         /* 0x01e6 */
+       __u64   itdba;                  /* 0x01e8 */
+       __u64   riccbd;                 /* 0x01f0 */
+       __u64   gvrd;                   /* 0x01f8 */
+} __packed __aligned(512);
+
+#endif /* SELFTEST_KVM_SIE_H */
diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h

new file mode 100644 (file)

index 0000000..8035a87
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/ucall.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h

deleted file mode 100644 (file)

index 1bf2756..0000000
--- a/tools/testing/selftests/kvm/include/s390x/debug_print.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Definition for kernel virtual machines on s390x
- *
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Christoph Schlameuss <schlameuss@linux.ibm.com>
- */
-
-#ifndef SELFTEST_KVM_DEBUG_PRINT_H
-#define SELFTEST_KVM_DEBUG_PRINT_H
-
-#include "asm/ptrace.h"
-#include "kvm_util.h"
-#include "sie.h"
-
-static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
-{
-       u64 pos;
-
-       pr_debug("%s (%p)\n", name, (void *)addr);
-       pr_debug("            0/0x00---------|");
-       if (len > 8)
-               pr_debug(" 8/0x08---------|");
-       if (len > 16)
-               pr_debug(" 16/0x10--------|");
-       if (len > 24)
-               pr_debug(" 24/0x18--------|");
-       for (pos = 0; pos < len; pos += 8) {
-               if ((pos % 32) == 0)
-                       pr_debug("\n %3lu 0x%.3lx ", pos, pos);
-               pr_debug(" %16lx", *((u64 *)(addr + pos)));
-       }
-       pr_debug("\n");
-}
-
-static inline void print_hex(const char *name, u64 addr)
-{
-       print_hex_bytes(name, addr, 512);
-}
-
-static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
-{
-       pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
-                run->flags,
-                run->psw_mask, run->psw_addr,
-                run->exit_reason, exit_reason_str(run->exit_reason));
-       pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
-                sie_block->psw_mask, sie_block->psw_addr);
-}
-
-static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
-{
-       print_hex_bytes("run", (u64)run, 0x150);
-       print_hex("sie_block", (u64)sie_block);
-       print_psw(run, sie_block);
-}
-
-static inline void print_regs(struct kvm_run *run)
-{
-       struct kvm_sync_regs *sync_regs = &run->s.regs;
-
-       print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
-       print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
-       print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
-}
-
-#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h

deleted file mode 100644 (file)

index b0ed713..0000000
--- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- *
- * Test handler for the s390x DIAGNOSE 0x0318 instruction.
- *
- * Copyright (C) 2020, IBM
- */
-
-#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
-#define SELFTEST_KVM_DIAG318_TEST_HANDLER
-
-uint64_t get_diag318_info(void);
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390x/facility.h

deleted file mode 100644 (file)

index 00a1ced..0000000
--- a/tools/testing/selftests/kvm/include/s390x/facility.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Hariharan Mari <hari55@linux.ibm.com>
- *
- * Get the facility bits with the STFLE instruction
- */
-
-#ifndef SELFTEST_KVM_FACILITY_H
-#define SELFTEST_KVM_FACILITY_H
-
-#include <linux/bitops.h>
-
-/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
-#define NB_STFL_DOUBLEWORDS 32
-
-extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
-extern bool stfle_flag;
-
-static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
-{
-       return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
-}
-
-static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
-{
-       register unsigned long r0 asm("0") = nb_doublewords - 1;
-
-       asm volatile("  .insn   s,0xb2b00000,0(%1)\n"
-                       : "+d" (r0)
-                       : "a" (fac)
-                       : "memory", "cc");
-}
-
-static inline void setup_facilities(void)
-{
-       stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
-       stfle_flag = true;
-}
-
-static inline bool test_facility(int nr)
-{
-       if (!stfle_flag)
-               setup_facilities();
-       return test_bit_inv(nr, stfl_doublewords);
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h

deleted file mode 100644 (file)

index e43a57d..0000000
--- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-struct kvm_vm_arch {};
-
-#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h

deleted file mode 100644 (file)

index 33fef6f..0000000
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * s390x processor specific defines
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <linux/compiler.h>
-
-/* Bits in the region/segment table entry */
-#define REGION_ENTRY_ORIGIN    ~0xfffUL /* region/segment table origin    */
-#define REGION_ENTRY_PROTECT   0x200    /* region protection bit          */
-#define REGION_ENTRY_NOEXEC    0x100    /* region no-execute bit          */
-#define REGION_ENTRY_OFFSET    0xc0     /* region table offset            */
-#define REGION_ENTRY_INVALID   0x20     /* invalid region table entry     */
-#define REGION_ENTRY_TYPE      0x0c     /* region/segment table type mask */
-#define REGION_ENTRY_LENGTH    0x03     /* region third length            */
-
-/* Bits in the page table entry */
-#define PAGE_INVALID   0x400           /* HW invalid bit    */
-#define PAGE_PROTECT   0x200           /* HW read-only bit  */
-#define PAGE_NOEXEC    0x100           /* HW no-execute bit */
-
-/* Page size definitions */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE - 1))
-
-/* Is there a portable way to do this? */
-static inline void cpu_relax(void)
-{
-       barrier();
-}
-
-/* Get the instruction length */
-static inline int insn_length(unsigned char code)
-{
-       return ((((int)code + 64) >> 7) + 1) << 1;
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h

deleted file mode 100644 (file)

index 160acd4..0000000
--- a/tools/testing/selftests/kvm/include/s390x/sie.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Definition for kernel virtual machines on s390.
- *
- * Adapted copy of struct definition kvm_s390_sie_block from
- * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
- *
- * Copyright IBM Corp. 2008, 2024
- *
- * Authors:
- *  Christoph Schlameuss <schlameuss@linux.ibm.com>
- *  Carsten Otte <cotte@de.ibm.com>
- */
-
-#ifndef SELFTEST_KVM_SIE_H
-#define SELFTEST_KVM_SIE_H
-
-#include <linux/types.h>
-
-struct kvm_s390_sie_block {
-#define CPUSTAT_STOPPED    0x80000000
-#define CPUSTAT_WAIT       0x10000000
-#define CPUSTAT_ECALL_PEND 0x08000000
-#define CPUSTAT_STOP_INT   0x04000000
-#define CPUSTAT_IO_INT     0x02000000
-#define CPUSTAT_EXT_INT    0x01000000
-#define CPUSTAT_RUNNING    0x00800000
-#define CPUSTAT_RETAINED   0x00400000
-#define CPUSTAT_TIMING_SUB 0x00020000
-#define CPUSTAT_SIE_SUB    0x00010000
-#define CPUSTAT_RRF        0x00008000
-#define CPUSTAT_SLSV       0x00004000
-#define CPUSTAT_SLSR       0x00002000
-#define CPUSTAT_ZARCH      0x00000800
-#define CPUSTAT_MCDS       0x00000100
-#define CPUSTAT_KSS        0x00000200
-#define CPUSTAT_SM         0x00000080
-#define CPUSTAT_IBS        0x00000040
-#define CPUSTAT_GED2       0x00000010
-#define CPUSTAT_G          0x00000008
-#define CPUSTAT_GED        0x00000004
-#define CPUSTAT_J          0x00000002
-#define CPUSTAT_P          0x00000001
-       __u32 cpuflags;                 /* 0x0000 */
-       __u32: 1;                       /* 0x0004 */
-       __u32 prefix : 18;
-       __u32: 1;
-       __u32 ibc : 12;
-       __u8    reserved08[4];          /* 0x0008 */
-#define PROG_IN_SIE BIT(0)
-       __u32   prog0c;                 /* 0x000c */
-       union {
-               __u8    reserved10[16]; /* 0x0010 */
-               struct {
-                       __u64   pv_handle_cpu;
-                       __u64   pv_handle_config;
-               };
-       };
-#define PROG_BLOCK_SIE BIT(0)
-#define PROG_REQUEST   BIT(1)
-       __u32   prog20;                 /* 0x0020 */
-       __u8    reserved24[4];          /* 0x0024 */
-       __u64   cputm;                  /* 0x0028 */
-       __u64   ckc;                    /* 0x0030 */
-       __u64   epoch;                  /* 0x0038 */
-       __u32   svcc;                   /* 0x0040 */
-#define LCTL_CR0       0x8000
-#define LCTL_CR6       0x0200
-#define LCTL_CR9       0x0040
-#define LCTL_CR10      0x0020
-#define LCTL_CR11      0x0010
-#define LCTL_CR14      0x0002
-       __u16   lctl;                   /* 0x0044 */
-       __s16   icpua;                  /* 0x0046 */
-#define ICTL_OPEREXC   0x80000000
-#define ICTL_PINT      0x20000000
-#define ICTL_LPSW      0x00400000
-#define ICTL_STCTL     0x00040000
-#define ICTL_ISKE      0x00004000
-#define ICTL_SSKE      0x00002000
-#define ICTL_RRBE      0x00001000
-#define ICTL_TPROT     0x00000200
-       __u32   ictl;                   /* 0x0048 */
-#define ECA_CEI                0x80000000
-#define ECA_IB         0x40000000
-#define ECA_SIGPI      0x10000000
-#define ECA_MVPGI      0x01000000
-#define ECA_AIV                0x00200000
-#define ECA_VX         0x00020000
-#define ECA_PROTEXCI   0x00002000
-#define ECA_APIE       0x00000008
-#define ECA_SII                0x00000001
-       __u32   eca;                    /* 0x004c */
-#define ICPT_INST      0x04
-#define ICPT_PROGI     0x08
-#define ICPT_INSTPROGI 0x0C
-#define ICPT_EXTREQ    0x10
-#define ICPT_EXTINT    0x14
-#define ICPT_IOREQ     0x18
-#define ICPT_WAIT      0x1c
-#define ICPT_VALIDITY  0x20
-#define ICPT_STOP      0x28
-#define ICPT_OPEREXC   0x2C
-#define ICPT_PARTEXEC  0x38
-#define ICPT_IOINST    0x40
-#define ICPT_KSS       0x5c
-#define ICPT_MCHKREQ   0x60
-#define ICPT_INT_ENABLE        0x64
-#define ICPT_PV_INSTR  0x68
-#define ICPT_PV_NOTIFY 0x6c
-#define ICPT_PV_PREF   0x70
-       __u8    icptcode;               /* 0x0050 */
-       __u8    icptstatus;             /* 0x0051 */
-       __u16   ihcpu;                  /* 0x0052 */
-       __u8    reserved54;             /* 0x0054 */
-#define IICTL_CODE_NONE                 0x00
-#define IICTL_CODE_MCHK                 0x01
-#define IICTL_CODE_EXT          0x02
-#define IICTL_CODE_IO           0x03
-#define IICTL_CODE_RESTART      0x04
-#define IICTL_CODE_SPECIFICATION 0x10
-#define IICTL_CODE_OPERAND      0x11
-       __u8    iictl;                  /* 0x0055 */
-       __u16   ipa;                    /* 0x0056 */
-       __u32   ipb;                    /* 0x0058 */
-       __u32   scaoh;                  /* 0x005c */
-#define FPF_BPBC       0x20
-       __u8    fpf;                    /* 0x0060 */
-#define ECB_GS         0x40
-#define ECB_TE         0x10
-#define ECB_SPECI      0x08
-#define ECB_SRSI       0x04
-#define ECB_HOSTPROTINT        0x02
-#define ECB_PTF                0x01
-       __u8    ecb;                    /* 0x0061 */
-#define ECB2_CMMA      0x80
-#define ECB2_IEP       0x20
-#define ECB2_PFMFI     0x08
-#define ECB2_ESCA      0x04
-#define ECB2_ZPCI_LSI  0x02
-       __u8    ecb2;                   /* 0x0062 */
-#define ECB3_AISI      0x20
-#define ECB3_AISII     0x10
-#define ECB3_DEA       0x08
-#define ECB3_AES       0x04
-#define ECB3_RI                0x01
-       __u8    ecb3;                   /* 0x0063 */
-#define ESCA_SCAOL_MASK ~0x3fU
-       __u32   scaol;                  /* 0x0064 */
-       __u8    sdf;                    /* 0x0068 */
-       __u8    epdx;                   /* 0x0069 */
-       __u8    cpnc;                   /* 0x006a */
-       __u8    reserved6b;             /* 0x006b */
-       __u32   todpr;                  /* 0x006c */
-#define GISA_FORMAT1 0x00000001
-       __u32   gd;                     /* 0x0070 */
-       __u8    reserved74[12];         /* 0x0074 */
-       __u64   mso;                    /* 0x0080 */
-       __u64   msl;                    /* 0x0088 */
-       __u64   psw_mask;               /* 0x0090 */
-       __u64   psw_addr;               /* 0x0098 */
-       __u64   gg14;                   /* 0x00a0 */
-       __u64   gg15;                   /* 0x00a8 */
-       __u8    reservedb0[8];          /* 0x00b0 */
-#define HPID_KVM       0x4
-#define HPID_VSIE      0x5
-       __u8    hpid;                   /* 0x00b8 */
-       __u8    reservedb9[7];          /* 0x00b9 */
-       union {
-               struct {
-                       __u32   eiparams;       /* 0x00c0 */
-                       __u16   extcpuaddr;     /* 0x00c4 */
-                       __u16   eic;            /* 0x00c6 */
-               };
-               __u64   mcic;                   /* 0x00c0 */
-       } __packed;
-       __u32   reservedc8;             /* 0x00c8 */
-       union {
-               struct {
-                       __u16   pgmilc;         /* 0x00cc */
-                       __u16   iprcc;          /* 0x00ce */
-               };
-               __u32   edc;                    /* 0x00cc */
-       } __packed;
-       union {
-               struct {
-                       __u32   dxc;            /* 0x00d0 */
-                       __u16   mcn;            /* 0x00d4 */
-                       __u8    perc;           /* 0x00d6 */
-                       __u8    peratmid;       /* 0x00d7 */
-               };
-               __u64   faddr;                  /* 0x00d0 */
-       } __packed;
-       __u64   peraddr;                /* 0x00d8 */
-       __u8    eai;                    /* 0x00e0 */
-       __u8    peraid;                 /* 0x00e1 */
-       __u8    oai;                    /* 0x00e2 */
-       __u8    armid;                  /* 0x00e3 */
-       __u8    reservede4[4];          /* 0x00e4 */
-       union {
-               __u64   tecmc;          /* 0x00e8 */
-               struct {
-                       __u16   subchannel_id;  /* 0x00e8 */
-                       __u16   subchannel_nr;  /* 0x00ea */
-                       __u32   io_int_parm;    /* 0x00ec */
-                       __u32   io_int_word;    /* 0x00f0 */
-               };
-       } __packed;
-       __u8    reservedf4[8];          /* 0x00f4 */
-#define CRYCB_FORMAT_MASK      0x00000003
-#define CRYCB_FORMAT0          0x00000000
-#define CRYCB_FORMAT1          0x00000001
-#define CRYCB_FORMAT2          0x00000003
-       __u32   crycbd;                 /* 0x00fc */
-       __u64   gcr[16];                /* 0x0100 */
-       union {
-               __u64   gbea;           /* 0x0180 */
-               __u64   sidad;
-       };
-       __u8    reserved188[8];         /* 0x0188 */
-       __u64   sdnxo;                  /* 0x0190 */
-       __u8    reserved198[8];         /* 0x0198 */
-       __u32   fac;                    /* 0x01a0 */
-       __u8    reserved1a4[20];        /* 0x01a4 */
-       __u64   cbrlo;                  /* 0x01b8 */
-       __u8    reserved1c0[8];         /* 0x01c0 */
-#define ECD_HOSTREGMGMT        0x20000000
-#define ECD_MEF                0x08000000
-#define ECD_ETOKENF    0x02000000
-#define ECD_ECC                0x00200000
-       __u32   ecd;                    /* 0x01c8 */
-       __u8    reserved1cc[18];        /* 0x01cc */
-       __u64   pp;                     /* 0x01de */
-       __u8    reserved1e6[2];         /* 0x01e6 */
-       __u64   itdba;                  /* 0x01e8 */
-       __u64   riccbd;                 /* 0x01f0 */
-       __u64   gvrd;                   /* 0x01f8 */
-} __packed __aligned(512);
-
-#endif /* SELFTEST_KVM_SIE_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h

deleted file mode 100644 (file)

index 8035a87..0000000
--- a/tools/testing/selftests/kvm/include/s390x/ucall.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
-
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
-       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h

new file mode 100644 (file)

index 0000000..80fe9f6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+#include "ucall_common.h"
+
+#define APIC_DEFAULT_GPA               0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE              0x0000001b
+#define MSR_IA32_APICBASE_BSP          (1<<8)
+#define MSR_IA32_APICBASE_EXTD         (1<<10)
+#define MSR_IA32_APICBASE_ENABLE       (1<<11)
+#define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
+#define                GET_APIC_BASE(x)        (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR  0x800
+#define X2APIC_ENABLE  (1UL << 10)
+#define        APIC_ID         0x20
+#define        APIC_LVR        0x30
+#define                GET_APIC_ID_FIELD(x)    (((x) >> 24) & 0xFF)
+#define        APIC_TASKPRI    0x80
+#define        APIC_PROCPRI    0xA0
+#define        APIC_EOI        0xB0
+#define        APIC_SPIV       0xF0
+#define                APIC_SPIV_FOCUS_DISABLED        (1 << 9)
+#define                APIC_SPIV_APIC_ENABLED          (1 << 8)
+#define APIC_IRR       0x200
+#define        APIC_ICR        0x300
+#define        APIC_LVTCMCI    0x2f0
+#define                APIC_DEST_SELF          0x40000
+#define                APIC_DEST_ALLINC        0x80000
+#define                APIC_DEST_ALLBUT        0xC0000
+#define                APIC_ICR_RR_MASK        0x30000
+#define                APIC_ICR_RR_INVALID     0x00000
+#define                APIC_ICR_RR_INPROG      0x10000
+#define                APIC_ICR_RR_VALID       0x20000
+#define                APIC_INT_LEVELTRIG      0x08000
+#define                APIC_INT_ASSERT         0x04000
+#define                APIC_ICR_BUSY           0x01000
+#define                APIC_DEST_LOGICAL       0x00800
+#define                APIC_DEST_PHYSICAL      0x00000
+#define                APIC_DM_FIXED           0x00000
+#define                APIC_DM_FIXED_MASK      0x00700
+#define                APIC_DM_LOWEST          0x00100
+#define                APIC_DM_SMI             0x00200
+#define                APIC_DM_REMRD           0x00300
+#define                APIC_DM_NMI             0x00400
+#define                APIC_DM_INIT            0x00500
+#define                APIC_DM_STARTUP         0x00600
+#define                APIC_DM_EXTINT          0x00700
+#define                APIC_VECTOR_MASK        0x000FF
+#define        APIC_ICR2       0x310
+#define                SET_APIC_DEST_FIELD(x)  ((x) << 24)
+#define APIC_LVTT      0x320
+#define                APIC_LVT_TIMER_ONESHOT          (0 << 17)
+#define                APIC_LVT_TIMER_PERIODIC         (1 << 17)
+#define                APIC_LVT_TIMER_TSCDEADLINE      (2 << 17)
+#define                APIC_LVT_MASKED                 (1 << 16)
+#define        APIC_TMICT      0x380
+#define        APIC_TMCCT      0x390
+#define        APIC_TDCR       0x3E0
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+       return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+       return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+       ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+       return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
+{
+       return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+       uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+       __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
+                      fault, APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
+{
+       uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+       __GUEST_ASSERT(fault == GP_VECTOR,
+                      "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
+                      APIC_BASE_MSR + (reg >> 4), value, fault);
+}
+
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h

new file mode 100644 (file)

index 0000000..5a74bb3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/evmcs.h
@@ -0,0 +1,1276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "hyperv.h"
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#define EVMCS_VERSION 1
+
+extern bool enable_evmcs;
+
+struct hv_enlightened_vmcs {
+       u32 revision_id;
+       u32 abort;
+
+       u16 host_es_selector;
+       u16 host_cs_selector;
+       u16 host_ss_selector;
+       u16 host_ds_selector;
+       u16 host_fs_selector;
+       u16 host_gs_selector;
+       u16 host_tr_selector;
+
+       u16 padding16_1;
+
+       u64 host_ia32_pat;
+       u64 host_ia32_efer;
+
+       u64 host_cr0;
+       u64 host_cr3;
+       u64 host_cr4;
+
+       u64 host_ia32_sysenter_esp;
+       u64 host_ia32_sysenter_eip;
+       u64 host_rip;
+       u32 host_ia32_sysenter_cs;
+
+       u32 pin_based_vm_exec_control;
+       u32 vm_exit_controls;
+       u32 secondary_vm_exec_control;
+
+       u64 io_bitmap_a;
+       u64 io_bitmap_b;
+       u64 msr_bitmap;
+
+       u16 guest_es_selector;
+       u16 guest_cs_selector;
+       u16 guest_ss_selector;
+       u16 guest_ds_selector;
+       u16 guest_fs_selector;
+       u16 guest_gs_selector;
+       u16 guest_ldtr_selector;
+       u16 guest_tr_selector;
+
+       u32 guest_es_limit;
+       u32 guest_cs_limit;
+       u32 guest_ss_limit;
+       u32 guest_ds_limit;
+       u32 guest_fs_limit;
+       u32 guest_gs_limit;
+       u32 guest_ldtr_limit;
+       u32 guest_tr_limit;
+       u32 guest_gdtr_limit;
+       u32 guest_idtr_limit;
+
+       u32 guest_es_ar_bytes;
+       u32 guest_cs_ar_bytes;
+       u32 guest_ss_ar_bytes;
+       u32 guest_ds_ar_bytes;
+       u32 guest_fs_ar_bytes;
+       u32 guest_gs_ar_bytes;
+       u32 guest_ldtr_ar_bytes;
+       u32 guest_tr_ar_bytes;
+
+       u64 guest_es_base;
+       u64 guest_cs_base;
+       u64 guest_ss_base;
+       u64 guest_ds_base;
+       u64 guest_fs_base;
+       u64 guest_gs_base;
+       u64 guest_ldtr_base;
+       u64 guest_tr_base;
+       u64 guest_gdtr_base;
+       u64 guest_idtr_base;
+
+       u64 padding64_1[3];
+
+       u64 vm_exit_msr_store_addr;
+       u64 vm_exit_msr_load_addr;
+       u64 vm_entry_msr_load_addr;
+
+       u64 cr3_target_value0;
+       u64 cr3_target_value1;
+       u64 cr3_target_value2;
+       u64 cr3_target_value3;
+
+       u32 page_fault_error_code_mask;
+       u32 page_fault_error_code_match;
+
+       u32 cr3_target_count;
+       u32 vm_exit_msr_store_count;
+       u32 vm_exit_msr_load_count;
+       u32 vm_entry_msr_load_count;
+
+       u64 tsc_offset;
+       u64 virtual_apic_page_addr;
+       u64 vmcs_link_pointer;
+
+       u64 guest_ia32_debugctl;
+       u64 guest_ia32_pat;
+       u64 guest_ia32_efer;
+
+       u64 guest_pdptr0;
+       u64 guest_pdptr1;
+       u64 guest_pdptr2;
+       u64 guest_pdptr3;
+
+       u64 guest_pending_dbg_exceptions;
+       u64 guest_sysenter_esp;
+       u64 guest_sysenter_eip;
+
+       u32 guest_activity_state;
+       u32 guest_sysenter_cs;
+
+       u64 cr0_guest_host_mask;
+       u64 cr4_guest_host_mask;
+       u64 cr0_read_shadow;
+       u64 cr4_read_shadow;
+       u64 guest_cr0;
+       u64 guest_cr3;
+       u64 guest_cr4;
+       u64 guest_dr7;
+
+       u64 host_fs_base;
+       u64 host_gs_base;
+       u64 host_tr_base;
+       u64 host_gdtr_base;
+       u64 host_idtr_base;
+       u64 host_rsp;
+
+       u64 ept_pointer;
+
+       u16 virtual_processor_id;
+       u16 padding16_2[3];
+
+       u64 padding64_2[5];
+       u64 guest_physical_address;
+
+       u32 vm_instruction_error;
+       u32 vm_exit_reason;
+       u32 vm_exit_intr_info;
+       u32 vm_exit_intr_error_code;
+       u32 idt_vectoring_info_field;
+       u32 idt_vectoring_error_code;
+       u32 vm_exit_instruction_len;
+       u32 vmx_instruction_info;
+
+       u64 exit_qualification;
+       u64 exit_io_instruction_ecx;
+       u64 exit_io_instruction_esi;
+       u64 exit_io_instruction_edi;
+       u64 exit_io_instruction_eip;
+
+       u64 guest_linear_address;
+       u64 guest_rsp;
+       u64 guest_rflags;
+
+       u32 guest_interruptibility_info;
+       u32 cpu_based_vm_exec_control;
+       u32 exception_bitmap;
+       u32 vm_entry_controls;
+       u32 vm_entry_intr_info_field;
+       u32 vm_entry_exception_error_code;
+       u32 vm_entry_instruction_len;
+       u32 tpr_threshold;
+
+       u64 guest_rip;
+
+       u32 hv_clean_fields;
+       u32 padding32_1;
+       u32 hv_synthetic_controls;
+       struct {
+               u32 nested_flush_hypercall:1;
+               u32 msr_bitmap:1;
+               u32 reserved:30;
+       }  __packed hv_enlightenments_control;
+       u32 hv_vp_id;
+       u32 padding32_2;
+       u64 hv_vm_id;
+       u64 partition_assist_page;
+       u64 padding64_4[4];
+       u64 guest_bndcfgs;
+       u64 guest_ia32_perf_global_ctrl;
+       u64 guest_ia32_s_cet;
+       u64 guest_ssp;
+       u64 guest_ia32_int_ssp_table_addr;
+       u64 guest_ia32_lbr_ctl;
+       u64 padding64_5[2];
+       u64 xss_exit_bitmap;
+       u64 encls_exiting_bitmap;
+       u64 host_ia32_perf_global_ctrl;
+       u64 tsc_multiplier;
+       u64 host_ia32_s_cet;
+       u64 host_ssp;
+       u64 host_ia32_int_ssp_table_addr;
+       u64 padding64_6;
+} __packed;
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE                     0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP                BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP               BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2             BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1             BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC             BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT            BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY            BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN            BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR                     BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT             BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC              BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1               BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2               BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER             BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1                BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
+
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
+
+extern struct hv_enlightened_vmcs *current_evmcs;
+
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
+
+static inline void evmcs_enable(void)
+{
+       enable_evmcs = true;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+       current_vp_assist->current_nested_vmcs = vmcs_pa;
+       current_vp_assist->enlighten_vmentry = 1;
+
+       current_evmcs = vmcs;
+
+       return 0;
+}
+
+static inline bool load_evmcs(struct hyperv_test_pages *hv)
+{
+       if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
+               return false;
+
+       current_evmcs->revision_id = EVMCS_VERSION;
+
+       return true;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+       *value = current_vp_assist->current_nested_vmcs &
+               ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+       return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+       switch (encoding) {
+       case GUEST_RIP:
+               *value = current_evmcs->guest_rip;
+               break;
+       case GUEST_RSP:
+               *value = current_evmcs->guest_rsp;
+               break;
+       case GUEST_RFLAGS:
+               *value = current_evmcs->guest_rflags;
+               break;
+       case HOST_IA32_PAT:
+               *value = current_evmcs->host_ia32_pat;
+               break;
+       case HOST_IA32_EFER:
+               *value = current_evmcs->host_ia32_efer;
+               break;
+       case HOST_CR0:
+               *value = current_evmcs->host_cr0;
+               break;
+       case HOST_CR3:
+               *value = current_evmcs->host_cr3;
+               break;
+       case HOST_CR4:
+               *value = current_evmcs->host_cr4;
+               break;
+       case HOST_IA32_SYSENTER_ESP:
+               *value = current_evmcs->host_ia32_sysenter_esp;
+               break;
+       case HOST_IA32_SYSENTER_EIP:
+               *value = current_evmcs->host_ia32_sysenter_eip;
+               break;
+       case HOST_RIP:
+               *value = current_evmcs->host_rip;
+               break;
+       case IO_BITMAP_A:
+               *value = current_evmcs->io_bitmap_a;
+               break;
+       case IO_BITMAP_B:
+               *value = current_evmcs->io_bitmap_b;
+               break;
+       case MSR_BITMAP:
+               *value = current_evmcs->msr_bitmap;
+               break;
+       case GUEST_ES_BASE:
+               *value = current_evmcs->guest_es_base;
+               break;
+       case GUEST_CS_BASE:
+               *value = current_evmcs->guest_cs_base;
+               break;
+       case GUEST_SS_BASE:
+               *value = current_evmcs->guest_ss_base;
+               break;
+       case GUEST_DS_BASE:
+               *value = current_evmcs->guest_ds_base;
+               break;
+       case GUEST_FS_BASE:
+               *value = current_evmcs->guest_fs_base;
+               break;
+       case GUEST_GS_BASE:
+               *value = current_evmcs->guest_gs_base;
+               break;
+       case GUEST_LDTR_BASE:
+               *value = current_evmcs->guest_ldtr_base;
+               break;
+       case GUEST_TR_BASE:
+               *value = current_evmcs->guest_tr_base;
+               break;
+       case GUEST_GDTR_BASE:
+               *value = current_evmcs->guest_gdtr_base;
+               break;
+       case GUEST_IDTR_BASE:
+               *value = current_evmcs->guest_idtr_base;
+               break;
+       case TSC_OFFSET:
+               *value = current_evmcs->tsc_offset;
+               break;
+       case VIRTUAL_APIC_PAGE_ADDR:
+               *value = current_evmcs->virtual_apic_page_addr;
+               break;
+       case VMCS_LINK_POINTER:
+               *value = current_evmcs->vmcs_link_pointer;
+               break;
+       case GUEST_IA32_DEBUGCTL:
+               *value = current_evmcs->guest_ia32_debugctl;
+               break;
+       case GUEST_IA32_PAT:
+               *value = current_evmcs->guest_ia32_pat;
+               break;
+       case GUEST_IA32_EFER:
+               *value = current_evmcs->guest_ia32_efer;
+               break;
+       case GUEST_PDPTR0:
+               *value = current_evmcs->guest_pdptr0;
+               break;
+       case GUEST_PDPTR1:
+               *value = current_evmcs->guest_pdptr1;
+               break;
+       case GUEST_PDPTR2:
+               *value = current_evmcs->guest_pdptr2;
+               break;
+       case GUEST_PDPTR3:
+               *value = current_evmcs->guest_pdptr3;
+               break;
+       case GUEST_PENDING_DBG_EXCEPTIONS:
+               *value = current_evmcs->guest_pending_dbg_exceptions;
+               break;
+       case GUEST_SYSENTER_ESP:
+               *value = current_evmcs->guest_sysenter_esp;
+               break;
+       case GUEST_SYSENTER_EIP:
+               *value = current_evmcs->guest_sysenter_eip;
+               break;
+       case CR0_GUEST_HOST_MASK:
+               *value = current_evmcs->cr0_guest_host_mask;
+               break;
+       case CR4_GUEST_HOST_MASK:
+               *value = current_evmcs->cr4_guest_host_mask;
+               break;
+       case CR0_READ_SHADOW:
+               *value = current_evmcs->cr0_read_shadow;
+               break;
+       case CR4_READ_SHADOW:
+               *value = current_evmcs->cr4_read_shadow;
+               break;
+       case GUEST_CR0:
+               *value = current_evmcs->guest_cr0;
+               break;
+       case GUEST_CR3:
+               *value = current_evmcs->guest_cr3;
+               break;
+       case GUEST_CR4:
+               *value = current_evmcs->guest_cr4;
+               break;
+       case GUEST_DR7:
+               *value = current_evmcs->guest_dr7;
+               break;
+       case HOST_FS_BASE:
+               *value = current_evmcs->host_fs_base;
+               break;
+       case HOST_GS_BASE:
+               *value = current_evmcs->host_gs_base;
+               break;
+       case HOST_TR_BASE:
+               *value = current_evmcs->host_tr_base;
+               break;
+       case HOST_GDTR_BASE:
+               *value = current_evmcs->host_gdtr_base;
+               break;
+       case HOST_IDTR_BASE:
+               *value = current_evmcs->host_idtr_base;
+               break;
+       case HOST_RSP:
+               *value = current_evmcs->host_rsp;
+               break;
+       case EPT_POINTER:
+               *value = current_evmcs->ept_pointer;
+               break;
+       case GUEST_BNDCFGS:
+               *value = current_evmcs->guest_bndcfgs;
+               break;
+       case XSS_EXIT_BITMAP:
+               *value = current_evmcs->xss_exit_bitmap;
+               break;
+       case GUEST_PHYSICAL_ADDRESS:
+               *value = current_evmcs->guest_physical_address;
+               break;
+       case EXIT_QUALIFICATION:
+               *value = current_evmcs->exit_qualification;
+               break;
+       case GUEST_LINEAR_ADDRESS:
+               *value = current_evmcs->guest_linear_address;
+               break;
+       case VM_EXIT_MSR_STORE_ADDR:
+               *value = current_evmcs->vm_exit_msr_store_addr;
+               break;
+       case VM_EXIT_MSR_LOAD_ADDR:
+               *value = current_evmcs->vm_exit_msr_load_addr;
+               break;
+       case VM_ENTRY_MSR_LOAD_ADDR:
+               *value = current_evmcs->vm_entry_msr_load_addr;
+               break;
+       case CR3_TARGET_VALUE0:
+               *value = current_evmcs->cr3_target_value0;
+               break;
+       case CR3_TARGET_VALUE1:
+               *value = current_evmcs->cr3_target_value1;
+               break;
+       case CR3_TARGET_VALUE2:
+               *value = current_evmcs->cr3_target_value2;
+               break;
+       case CR3_TARGET_VALUE3:
+               *value = current_evmcs->cr3_target_value3;
+               break;
+       case TPR_THRESHOLD:
+               *value = current_evmcs->tpr_threshold;
+               break;
+       case GUEST_INTERRUPTIBILITY_INFO:
+               *value = current_evmcs->guest_interruptibility_info;
+               break;
+       case CPU_BASED_VM_EXEC_CONTROL:
+               *value = current_evmcs->cpu_based_vm_exec_control;
+               break;
+       case EXCEPTION_BITMAP:
+               *value = current_evmcs->exception_bitmap;
+               break;
+       case VM_ENTRY_CONTROLS:
+               *value = current_evmcs->vm_entry_controls;
+               break;
+       case VM_ENTRY_INTR_INFO_FIELD:
+               *value = current_evmcs->vm_entry_intr_info_field;
+               break;
+       case VM_ENTRY_EXCEPTION_ERROR_CODE:
+               *value = current_evmcs->vm_entry_exception_error_code;
+               break;
+       case VM_ENTRY_INSTRUCTION_LEN:
+               *value = current_evmcs->vm_entry_instruction_len;
+               break;
+       case HOST_IA32_SYSENTER_CS:
+               *value = current_evmcs->host_ia32_sysenter_cs;
+               break;
+       case PIN_BASED_VM_EXEC_CONTROL:
+               *value = current_evmcs->pin_based_vm_exec_control;
+               break;
+       case VM_EXIT_CONTROLS:
+               *value = current_evmcs->vm_exit_controls;
+               break;
+       case SECONDARY_VM_EXEC_CONTROL:
+               *value = current_evmcs->secondary_vm_exec_control;
+               break;
+       case GUEST_ES_LIMIT:
+               *value = current_evmcs->guest_es_limit;
+               break;
+       case GUEST_CS_LIMIT:
+               *value = current_evmcs->guest_cs_limit;
+               break;
+       case GUEST_SS_LIMIT:
+               *value = current_evmcs->guest_ss_limit;
+               break;
+       case GUEST_DS_LIMIT:
+               *value = current_evmcs->guest_ds_limit;
+               break;
+       case GUEST_FS_LIMIT:
+               *value = current_evmcs->guest_fs_limit;
+               break;
+       case GUEST_GS_LIMIT:
+               *value = current_evmcs->guest_gs_limit;
+               break;
+       case GUEST_LDTR_LIMIT:
+               *value = current_evmcs->guest_ldtr_limit;
+               break;
+       case GUEST_TR_LIMIT:
+               *value = current_evmcs->guest_tr_limit;
+               break;
+       case GUEST_GDTR_LIMIT:
+               *value = current_evmcs->guest_gdtr_limit;
+               break;
+       case GUEST_IDTR_LIMIT:
+               *value = current_evmcs->guest_idtr_limit;
+               break;
+       case GUEST_ES_AR_BYTES:
+               *value = current_evmcs->guest_es_ar_bytes;
+               break;
+       case GUEST_CS_AR_BYTES:
+               *value = current_evmcs->guest_cs_ar_bytes;
+               break;
+       case GUEST_SS_AR_BYTES:
+               *value = current_evmcs->guest_ss_ar_bytes;
+               break;
+       case GUEST_DS_AR_BYTES:
+               *value = current_evmcs->guest_ds_ar_bytes;
+               break;
+       case GUEST_FS_AR_BYTES:
+               *value = current_evmcs->guest_fs_ar_bytes;
+               break;
+       case GUEST_GS_AR_BYTES:
+               *value = current_evmcs->guest_gs_ar_bytes;
+               break;
+       case GUEST_LDTR_AR_BYTES:
+               *value = current_evmcs->guest_ldtr_ar_bytes;
+               break;
+       case GUEST_TR_AR_BYTES:
+               *value = current_evmcs->guest_tr_ar_bytes;
+               break;
+       case GUEST_ACTIVITY_STATE:
+               *value = current_evmcs->guest_activity_state;
+               break;
+       case GUEST_SYSENTER_CS:
+               *value = current_evmcs->guest_sysenter_cs;
+               break;
+       case VM_INSTRUCTION_ERROR:
+               *value = current_evmcs->vm_instruction_error;
+               break;
+       case VM_EXIT_REASON:
+               *value = current_evmcs->vm_exit_reason;
+               break;
+       case VM_EXIT_INTR_INFO:
+               *value = current_evmcs->vm_exit_intr_info;
+               break;
+       case VM_EXIT_INTR_ERROR_CODE:
+               *value = current_evmcs->vm_exit_intr_error_code;
+               break;
+       case IDT_VECTORING_INFO_FIELD:
+               *value = current_evmcs->idt_vectoring_info_field;
+               break;
+       case IDT_VECTORING_ERROR_CODE:
+               *value = current_evmcs->idt_vectoring_error_code;
+               break;
+       case VM_EXIT_INSTRUCTION_LEN:
+               *value = current_evmcs->vm_exit_instruction_len;
+               break;
+       case VMX_INSTRUCTION_INFO:
+               *value = current_evmcs->vmx_instruction_info;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MASK:
+               *value = current_evmcs->page_fault_error_code_mask;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MATCH:
+               *value = current_evmcs->page_fault_error_code_match;
+               break;
+       case CR3_TARGET_COUNT:
+               *value = current_evmcs->cr3_target_count;
+               break;
+       case VM_EXIT_MSR_STORE_COUNT:
+               *value = current_evmcs->vm_exit_msr_store_count;
+               break;
+       case VM_EXIT_MSR_LOAD_COUNT:
+               *value = current_evmcs->vm_exit_msr_load_count;
+               break;
+       case VM_ENTRY_MSR_LOAD_COUNT:
+               *value = current_evmcs->vm_entry_msr_load_count;
+               break;
+       case HOST_ES_SELECTOR:
+               *value = current_evmcs->host_es_selector;
+               break;
+       case HOST_CS_SELECTOR:
+               *value = current_evmcs->host_cs_selector;
+               break;
+       case HOST_SS_SELECTOR:
+               *value = current_evmcs->host_ss_selector;
+               break;
+       case HOST_DS_SELECTOR:
+               *value = current_evmcs->host_ds_selector;
+               break;
+       case HOST_FS_SELECTOR:
+               *value = current_evmcs->host_fs_selector;
+               break;
+       case HOST_GS_SELECTOR:
+               *value = current_evmcs->host_gs_selector;
+               break;
+       case HOST_TR_SELECTOR:
+               *value = current_evmcs->host_tr_selector;
+               break;
+       case GUEST_ES_SELECTOR:
+               *value = current_evmcs->guest_es_selector;
+               break;
+       case GUEST_CS_SELECTOR:
+               *value = current_evmcs->guest_cs_selector;
+               break;
+       case GUEST_SS_SELECTOR:
+               *value = current_evmcs->guest_ss_selector;
+               break;
+       case GUEST_DS_SELECTOR:
+               *value = current_evmcs->guest_ds_selector;
+               break;
+       case GUEST_FS_SELECTOR:
+               *value = current_evmcs->guest_fs_selector;
+               break;
+       case GUEST_GS_SELECTOR:
+               *value = current_evmcs->guest_gs_selector;
+               break;
+       case GUEST_LDTR_SELECTOR:
+               *value = current_evmcs->guest_ldtr_selector;
+               break;
+       case GUEST_TR_SELECTOR:
+               *value = current_evmcs->guest_tr_selector;
+               break;
+       case VIRTUAL_PROCESSOR_ID:
+               *value = current_evmcs->virtual_processor_id;
+               break;
+       case HOST_IA32_PERF_GLOBAL_CTRL:
+               *value = current_evmcs->host_ia32_perf_global_ctrl;
+               break;
+       case GUEST_IA32_PERF_GLOBAL_CTRL:
+               *value = current_evmcs->guest_ia32_perf_global_ctrl;
+               break;
+       case ENCLS_EXITING_BITMAP:
+               *value = current_evmcs->encls_exiting_bitmap;
+               break;
+       case TSC_MULTIPLIER:
+               *value = current_evmcs->tsc_multiplier;
+               break;
+       default: return 1;
+       }
+
+       return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+       switch (encoding) {
+       case GUEST_RIP:
+               current_evmcs->guest_rip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case GUEST_RSP:
+               current_evmcs->guest_rsp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+               break;
+       case GUEST_RFLAGS:
+               current_evmcs->guest_rflags = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+               break;
+       case HOST_IA32_PAT:
+               current_evmcs->host_ia32_pat = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_IA32_EFER:
+               current_evmcs->host_ia32_efer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CR0:
+               current_evmcs->host_cr0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CR3:
+               current_evmcs->host_cr3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CR4:
+               current_evmcs->host_cr4 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_IA32_SYSENTER_ESP:
+               current_evmcs->host_ia32_sysenter_esp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_IA32_SYSENTER_EIP:
+               current_evmcs->host_ia32_sysenter_eip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_RIP:
+               current_evmcs->host_rip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case IO_BITMAP_A:
+               current_evmcs->io_bitmap_a = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
+               break;
+       case IO_BITMAP_B:
+               current_evmcs->io_bitmap_b = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
+               break;
+       case MSR_BITMAP:
+               current_evmcs->msr_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+               break;
+       case GUEST_ES_BASE:
+               current_evmcs->guest_es_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_BASE:
+               current_evmcs->guest_cs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_BASE:
+               current_evmcs->guest_ss_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_BASE:
+               current_evmcs->guest_ds_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_BASE:
+               current_evmcs->guest_fs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_BASE:
+               current_evmcs->guest_gs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_BASE:
+               current_evmcs->guest_ldtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_BASE:
+               current_evmcs->guest_tr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GDTR_BASE:
+               current_evmcs->guest_gdtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_IDTR_BASE:
+               current_evmcs->guest_idtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case TSC_OFFSET:
+               current_evmcs->tsc_offset = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case VIRTUAL_APIC_PAGE_ADDR:
+               current_evmcs->virtual_apic_page_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case VMCS_LINK_POINTER:
+               current_evmcs->vmcs_link_pointer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_IA32_DEBUGCTL:
+               current_evmcs->guest_ia32_debugctl = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_IA32_PAT:
+               current_evmcs->guest_ia32_pat = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_IA32_EFER:
+               current_evmcs->guest_ia32_efer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR0:
+               current_evmcs->guest_pdptr0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR1:
+               current_evmcs->guest_pdptr1 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR2:
+               current_evmcs->guest_pdptr2 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR3:
+               current_evmcs->guest_pdptr3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PENDING_DBG_EXCEPTIONS:
+               current_evmcs->guest_pending_dbg_exceptions = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_SYSENTER_ESP:
+               current_evmcs->guest_sysenter_esp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_SYSENTER_EIP:
+               current_evmcs->guest_sysenter_eip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case CR0_GUEST_HOST_MASK:
+               current_evmcs->cr0_guest_host_mask = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case CR4_GUEST_HOST_MASK:
+               current_evmcs->cr4_guest_host_mask = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case CR0_READ_SHADOW:
+               current_evmcs->cr0_read_shadow = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case CR4_READ_SHADOW:
+               current_evmcs->cr4_read_shadow = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_CR0:
+               current_evmcs->guest_cr0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_CR3:
+               current_evmcs->guest_cr3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_CR4:
+               current_evmcs->guest_cr4 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_DR7:
+               current_evmcs->guest_dr7 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case HOST_FS_BASE:
+               current_evmcs->host_fs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_GS_BASE:
+               current_evmcs->host_gs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_TR_BASE:
+               current_evmcs->host_tr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_GDTR_BASE:
+               current_evmcs->host_gdtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_IDTR_BASE:
+               current_evmcs->host_idtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_RSP:
+               current_evmcs->host_rsp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case EPT_POINTER:
+               current_evmcs->ept_pointer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+               break;
+       case GUEST_BNDCFGS:
+               current_evmcs->guest_bndcfgs = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case XSS_EXIT_BITMAP:
+               current_evmcs->xss_exit_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case GUEST_PHYSICAL_ADDRESS:
+               current_evmcs->guest_physical_address = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case EXIT_QUALIFICATION:
+               current_evmcs->exit_qualification = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case GUEST_LINEAR_ADDRESS:
+               current_evmcs->guest_linear_address = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_MSR_STORE_ADDR:
+               current_evmcs->vm_exit_msr_store_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_EXIT_MSR_LOAD_ADDR:
+               current_evmcs->vm_exit_msr_load_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_ENTRY_MSR_LOAD_ADDR:
+               current_evmcs->vm_entry_msr_load_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE0:
+               current_evmcs->cr3_target_value0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE1:
+               current_evmcs->cr3_target_value1 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE2:
+               current_evmcs->cr3_target_value2 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE3:
+               current_evmcs->cr3_target_value3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case TPR_THRESHOLD:
+               current_evmcs->tpr_threshold = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case GUEST_INTERRUPTIBILITY_INFO:
+               current_evmcs->guest_interruptibility_info = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+               break;
+       case CPU_BASED_VM_EXEC_CONTROL:
+               current_evmcs->cpu_based_vm_exec_control = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
+               break;
+       case EXCEPTION_BITMAP:
+               current_evmcs->exception_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
+               break;
+       case VM_ENTRY_CONTROLS:
+               current_evmcs->vm_entry_controls = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
+               break;
+       case VM_ENTRY_INTR_INFO_FIELD:
+               current_evmcs->vm_entry_intr_info_field = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+               break;
+       case VM_ENTRY_EXCEPTION_ERROR_CODE:
+               current_evmcs->vm_entry_exception_error_code = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+               break;
+       case VM_ENTRY_INSTRUCTION_LEN:
+               current_evmcs->vm_entry_instruction_len = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+               break;
+       case HOST_IA32_SYSENTER_CS:
+               current_evmcs->host_ia32_sysenter_cs = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case PIN_BASED_VM_EXEC_CONTROL:
+               current_evmcs->pin_based_vm_exec_control = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+               break;
+       case VM_EXIT_CONTROLS:
+               current_evmcs->vm_exit_controls = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+               break;
+       case SECONDARY_VM_EXEC_CONTROL:
+               current_evmcs->secondary_vm_exec_control = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+               break;
+       case GUEST_ES_LIMIT:
+               current_evmcs->guest_es_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_LIMIT:
+               current_evmcs->guest_cs_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_LIMIT:
+               current_evmcs->guest_ss_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_LIMIT:
+               current_evmcs->guest_ds_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_LIMIT:
+               current_evmcs->guest_fs_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_LIMIT:
+               current_evmcs->guest_gs_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_LIMIT:
+               current_evmcs->guest_ldtr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_LIMIT:
+               current_evmcs->guest_tr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GDTR_LIMIT:
+               current_evmcs->guest_gdtr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_IDTR_LIMIT:
+               current_evmcs->guest_idtr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_ES_AR_BYTES:
+               current_evmcs->guest_es_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_AR_BYTES:
+               current_evmcs->guest_cs_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_AR_BYTES:
+               current_evmcs->guest_ss_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_AR_BYTES:
+               current_evmcs->guest_ds_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_AR_BYTES:
+               current_evmcs->guest_fs_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_AR_BYTES:
+               current_evmcs->guest_gs_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_AR_BYTES:
+               current_evmcs->guest_ldtr_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_AR_BYTES:
+               current_evmcs->guest_tr_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_ACTIVITY_STATE:
+               current_evmcs->guest_activity_state = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_SYSENTER_CS:
+               current_evmcs->guest_sysenter_cs = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case VM_INSTRUCTION_ERROR:
+               current_evmcs->vm_instruction_error = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_REASON:
+               current_evmcs->vm_exit_reason = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_INTR_INFO:
+               current_evmcs->vm_exit_intr_info = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_INTR_ERROR_CODE:
+               current_evmcs->vm_exit_intr_error_code = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case IDT_VECTORING_INFO_FIELD:
+               current_evmcs->idt_vectoring_info_field = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case IDT_VECTORING_ERROR_CODE:
+               current_evmcs->idt_vectoring_error_code = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_INSTRUCTION_LEN:
+               current_evmcs->vm_exit_instruction_len = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VMX_INSTRUCTION_INFO:
+               current_evmcs->vmx_instruction_info = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MASK:
+               current_evmcs->page_fault_error_code_mask = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MATCH:
+               current_evmcs->page_fault_error_code_match = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_COUNT:
+               current_evmcs->cr3_target_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_EXIT_MSR_STORE_COUNT:
+               current_evmcs->vm_exit_msr_store_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_EXIT_MSR_LOAD_COUNT:
+               current_evmcs->vm_exit_msr_load_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_ENTRY_MSR_LOAD_COUNT:
+               current_evmcs->vm_entry_msr_load_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case HOST_ES_SELECTOR:
+               current_evmcs->host_es_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CS_SELECTOR:
+               current_evmcs->host_cs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_SS_SELECTOR:
+               current_evmcs->host_ss_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_DS_SELECTOR:
+               current_evmcs->host_ds_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_FS_SELECTOR:
+               current_evmcs->host_fs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_GS_SELECTOR:
+               current_evmcs->host_gs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_TR_SELECTOR:
+               current_evmcs->host_tr_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case GUEST_ES_SELECTOR:
+               current_evmcs->guest_es_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_SELECTOR:
+               current_evmcs->guest_cs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_SELECTOR:
+               current_evmcs->guest_ss_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_SELECTOR:
+               current_evmcs->guest_ds_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_SELECTOR:
+               current_evmcs->guest_fs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_SELECTOR:
+               current_evmcs->guest_gs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_SELECTOR:
+               current_evmcs->guest_ldtr_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_SELECTOR:
+               current_evmcs->guest_tr_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case VIRTUAL_PROCESSOR_ID:
+               current_evmcs->virtual_processor_id = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+               break;
+       case HOST_IA32_PERF_GLOBAL_CTRL:
+               current_evmcs->host_ia32_perf_global_ctrl = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case GUEST_IA32_PERF_GLOBAL_CTRL:
+               current_evmcs->guest_ia32_perf_global_ctrl = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case ENCLS_EXITING_BITMAP:
+               current_evmcs->encls_exiting_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case TSC_MULTIPLIER:
+               current_evmcs->tsc_multiplier = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       default: return 1;
+       }
+
+       return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+       int ret;
+
+       current_evmcs->hv_clean_fields = 0;
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "mov %%rsp, (%[host_rsp]);"
+                            "lea 1f(%%rip), %%rax;"
+                            "mov %%rax, (%[host_rip]);"
+                            "vmlaunch;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"
+                              ((uint64_t)&current_evmcs->host_rsp),
+                              [host_rip]"r"
+                              ((uint64_t)&current_evmcs->host_rip)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+       int ret;
+
+       /* HOST_RIP */
+       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+       /* HOST_RSP */
+       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "mov %%rsp, (%[host_rsp]);"
+                            "lea 1f(%%rip), %%rax;"
+                            "mov %%rax, (%[host_rip]);"
+                            "vmresume;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"
+                              ((uint64_t)&current_evmcs->host_rsp),
+                              [host_rip]"r"
+                              ((uint64_t)&current_evmcs->host_rip)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h

new file mode 100644 (file)

index 0000000..f13e532
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/hyperv.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#include "processor.h"
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS  0x40000000
+#define HYPERV_CPUID_INTERFACE                 0x40000001
+#define HYPERV_CPUID_VERSION                   0x40000002
+#define HYPERV_CPUID_FEATURES                  0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO          0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS          0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES   0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES           0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS   0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE                  0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES      0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID                 0x40000000
+#define HV_X64_MSR_HYPERCALL                   0x40000001
+#define HV_X64_MSR_VP_INDEX                    0x40000002
+#define HV_X64_MSR_RESET                       0x40000003
+#define HV_X64_MSR_VP_RUNTIME                  0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT              0x40000020
+#define HV_X64_MSR_REFERENCE_TSC               0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY               0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY              0x40000023
+#define HV_X64_MSR_EOI                         0x40000070
+#define HV_X64_MSR_ICR                         0x40000071
+#define HV_X64_MSR_TPR                         0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
+#define HV_X64_MSR_SCONTROL                    0x40000080
+#define HV_X64_MSR_SVERSION                    0x40000081
+#define HV_X64_MSR_SIEFP                       0x40000082
+#define HV_X64_MSR_SIMP                                0x40000083
+#define HV_X64_MSR_EOM                         0x40000084
+#define HV_X64_MSR_SINT0                       0x40000090
+#define HV_X64_MSR_SINT1                       0x40000091
+#define HV_X64_MSR_SINT2                       0x40000092
+#define HV_X64_MSR_SINT3                       0x40000093
+#define HV_X64_MSR_SINT4                       0x40000094
+#define HV_X64_MSR_SINT5                       0x40000095
+#define HV_X64_MSR_SINT6                       0x40000096
+#define HV_X64_MSR_SINT7                       0x40000097
+#define HV_X64_MSR_SINT8                       0x40000098
+#define HV_X64_MSR_SINT9                       0x40000099
+#define HV_X64_MSR_SINT10                      0x4000009A
+#define HV_X64_MSR_SINT11                      0x4000009B
+#define HV_X64_MSR_SINT12                      0x4000009C
+#define HV_X64_MSR_SINT13                      0x4000009D
+#define HV_X64_MSR_SINT14                      0x4000009E
+#define HV_X64_MSR_SINT15                      0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG              0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT               0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG              0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT               0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG              0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT               0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT               0x400000B7
+#define HV_X64_MSR_GUEST_IDLE                  0x400000F0
+#define HV_X64_MSR_CRASH_P0                    0x40000100
+#define HV_X64_MSR_CRASH_P1                    0x40000101
+#define HV_X64_MSR_CRASH_P2                    0x40000102
+#define HV_X64_MSR_CRASH_P3                    0x40000103
+#define HV_X64_MSR_CRASH_P4                    0x40000104
+#define HV_X64_MSR_CRASH_CTL                   0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL     0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL       0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS                0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL       0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL              0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS               0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER          0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER          0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER       0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS              0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE                \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
+#define HV_MSR_SYNIC_AVAILABLE                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
+#define HV_MSR_SYNTIMER_AVAILABLE              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
+#define HV_MSR_HYPERCALL_AVAILABLE             \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
+#define HV_MSR_VP_INDEX_AVAILABLE              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
+#define HV_MSR_RESET_AVAILABLE                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
+#define HV_MSR_STAT_PAGES_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE         \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
+#define HV_ACCESS_FREQUENCY_MSRS               \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
+#define HV_ACCESS_REENLIGHTENMENT              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
+#define HV_ACCESS_TSC_INVARIANT                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS                   \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
+#define HV_ACCESS_PARTITION_ID                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
+#define HV_ACCESS_MEMORY_POOL                  \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
+#define HV_ADJUST_MESSAGE_BUFFERS              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
+#define HV_POST_MESSAGES                       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
+#define HV_SIGNAL_EVENTS                       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
+#define HV_CREATE_PORT                         \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
+#define HV_CONNECT_PORT                                \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
+#define HV_ACCESS_STATS                                \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
+#define HV_DEBUGGING                           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
+#define HV_CPU_MANAGEMENT                      \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS          \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
+#define HV_ISOLATION                           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE                         \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE               \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
+#define HV_X64_PERF_MONITOR_AVAILABLE                  \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED                   \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED             \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED          \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EAX */
+#define HV_X64_NESTED_DIRECT_FLUSH                     \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
+#define HV_X64_NESTED_MSR_BITMAP                       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EBX */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL          \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE     0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST      0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT           0x0008
+#define HVCALL_SEND_IPI                                0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
+#define HVCALL_SEND_IPI_EX                     0x0015
+#define HVCALL_GET_PARTITION_ID                        0x0046
+#define HVCALL_DEPOSIT_MEMORY                  0x0048
+#define HVCALL_CREATE_VP                       0x004e
+#define HVCALL_GET_VP_REGISTERS                        0x0050
+#define HVCALL_SET_VP_REGISTERS                        0x0051
+#define HVCALL_POST_MESSAGE                    0x005c
+#define HVCALL_SIGNAL_EVENT                    0x005d
+#define HVCALL_POST_DEBUG_DATA                 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA             0x006a
+#define HVCALL_RESET_DEBUG_SESSION             0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR           0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT            0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT          0x007d
+#define HVCALL_RETARGET_INTERRUPT              0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES         0x8001
+
+#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS                      0
+#define HV_STATUS_INVALID_HYPERCALL_CODE       2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT      3
+#define HV_STATUS_INVALID_ALIGNMENT            4
+#define HV_STATUS_INVALID_PARAMETER            5
+#define HV_STATUS_ACCESS_DENIED                        6
+#define HV_STATUS_OPERATION_DENIED             8
+#define HV_STATUS_INSUFFICIENT_MEMORY          11
+#define HV_STATUS_INVALID_PORT_ID              17
+#define HV_STATUS_INVALID_CONNECTION_ID                18
+#define HV_STATUS_INSUFFICIENT_BUFFERS         19
+
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT          BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET    17
+#define HV_HYPERCALL_REP_COMP_OFFSET   32
+
+/*
+ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
+ * is set to the hypercall status (if no exception occurred).
+ */
+static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+                                        vm_vaddr_t output_address,
+                                        uint64_t *hv_status)
+{
+       uint64_t error_code;
+       uint8_t vector;
+
+       /* Note both the hypercall and the "asm safe" clobber r9-r11. */
+       asm volatile("mov %[output_address], %%r8\n\t"
+                    KVM_ASM_SAFE("vmcall")
+                    : "=a" (*hv_status),
+                      "+c" (control), "+d" (input_address),
+                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)
+                    : [output_address] "r"(output_address),
+                      "a" (-EFAULT)
+                    : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+       return vector;
+}
+
+/* Issue a Hyper-V hypercall and assert that it succeeded. */
+static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+                                   vm_vaddr_t output_address)
+{
+       uint64_t hv_status;
+       uint8_t vector;
+
+       vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
+
+       GUEST_ASSERT(!vector);
+       GUEST_ASSERT((hv_status & 0xffff) == 0);
+}
+
+/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
+static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
+{
+       int i;
+
+       for (i = 0; i < n_sse_regs; i++)
+               write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
+}
+
+/* Proper HV_X64_MSR_GUEST_OS_ID value */
+#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
+
+#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE       0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT        12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+               (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_nested_enlightenments_control {
+       struct {
+               __u32 directhypercall:1;
+               __u32 reserved:31;
+       } features;
+       struct {
+               __u32 reserved;
+       } hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+       __u32 apic_assist;
+       __u32 reserved1;
+       __u64 vtl_control[3];
+       struct hv_nested_enlightenments_control nested_control;
+       __u8 enlighten_vmentry;
+       __u8 reserved2[7];
+       __u64 current_nested_vmcs;
+} __packed;
+
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+
+struct hyperv_test_pages {
+       /* VP assist page */
+       void *vp_assist_hva;
+       uint64_t vp_assist_gpa;
+       void *vp_assist;
+
+       /* Partition assist page */
+       void *partition_assist_hva;
+       uint64_t partition_assist_gpa;
+       void *partition_assist;
+
+       /* Enlightened VMCS */
+       void *enlightened_vmcs_hva;
+       uint64_t enlightened_vmcs_gpa;
+       void *enlightened_vmcs;
+};
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+                                                      vm_vaddr_t *p_hv_pages_gva);
+
+/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
+#define HV_INVARIANT_TSC_EXPOSED               BIT_ULL(0)
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..972bb1c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "kvm_util_types.h"
+#include "test_util.h"
+
+extern bool is_forced_emulation_enabled;
+
+struct kvm_vm_arch {
+       vm_vaddr_t gdt;
+       vm_vaddr_t tss;
+       vm_vaddr_t idt;
+
+       uint64_t c_bit;
+       uint64_t s_bit;
+       int sev_fd;
+       bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+       return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+       __vm_arch_has_protected_memory(&(vm)->arch)
+
+#define vcpu_arch_put_guest(mem, __val)                                                        \
+do {                                                                                   \
+       const typeof(mem) val = (__val);                                                \
+                                                                                       \
+       if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) {            \
+               (mem) = val;                                                            \
+       } else if (guest_random_bool(&guest_rng)) {                                     \
+               __asm__ __volatile__(KVM_FEP "mov %1, %0"                               \
+                                    : "+m" (mem)                                       \
+                                    : "r" (val) : "memory");                           \
+       } else {                                                                        \
+               uint64_t __old = READ_ONCE(mem);                                        \
+                                                                                       \
+               __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]"       \
+                                    : [ptr] "+m" (mem), [old] "+a" (__old)             \
+                                    : [new]"r" (val) : "memory", "cc");                \
+       }                                                                               \
+} while (0)
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h

new file mode 100644 (file)

index 0000000..295f2d5
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/mce.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MCE_H
+#define SELFTEST_KVM_MCE_H
+
+#define MCG_CTL_P              BIT_ULL(8)   /* MCG_CTL register available */
+#define MCG_SER_P              BIT_ULL(24)  /* MCA recovery/new status bits */
+#define MCG_LMCE_P             BIT_ULL(27)  /* Local machine check supported */
+#define MCG_CMCI_P             BIT_ULL(10)  /* CMCI supported */
+#define KVM_MAX_MCE_BANKS 32
+#define MCG_CAP_BANKS_MASK 0xff       /* Bit 0-7 of the MCG_CAP register are #banks */
+#define MCI_STATUS_VAL (1ULL << 63)   /* valid error */
+#define MCI_STATUS_UC (1ULL << 61)    /* uncorrected error */
+#define MCI_STATUS_EN (1ULL << 60)    /* error enabled */
+#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
+#define MCM_ADDR_PHYS 2    /* physical address */
+#define MCI_CTL2_CMCI_EN               BIT_ULL(30)
+
+#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h

new file mode 100644 (file)

index 0000000..3c10c4d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdint.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format.  Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector.  But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) |     \
+                                   ((eventsel) & 0xff) |               \
+                                   ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT            GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK            GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR              BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS               BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE             BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL      BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT              BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY              BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE           BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV              BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK            GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS                    BIT_ULL(29)
+#define INTEL_RDPMC_FIXED                      BIT_ULL(30)
+#define INTEL_RDPMC_FAST                       BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)     BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL                       BIT_ULL(0)
+#define FIXED_PMC_USER                         BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD                    BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI                   BIT_ULL(3)
+#define FIXED_PMC_NR_BITS                      4
+#define FIXED_PMC_CTRL(_idx, _val)             ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES                      BIT_ULL(13)
+#define PMU_CAP_LBR_FMT                                0x3f
+
+#define        INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
+#define        INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
+#define        INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
+#define        INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
+#define        INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
+#define        INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
+#define        INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
+#define        INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
+
+#define        AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
+#define        AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
+#define        AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
+#define        AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note!  The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+       INTEL_ARCH_CPU_CYCLES_INDEX,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+       INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+       INTEL_ARCH_LLC_REFERENCES_INDEX,
+       INTEL_ARCH_LLC_MISSES_INDEX,
+       INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+       INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+       INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+       NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+       AMD_ZEN_CORE_CYCLES_INDEX,
+       AMD_ZEN_INSTRUCTIONS_INDEX,
+       AMD_ZEN_BRANCHES_INDEX,
+       AMD_ZEN_BRANCH_MISSES_INDEX,
+       NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h

new file mode 100644 (file)

index 0000000..9ec984c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -0,0 +1,1395 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+
+#include <asm/msr-index.h>
+#include <asm/prctl.h>
+
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+extern bool host_cpu_is_intel;
+extern bool host_cpu_is_amd;
+extern uint64_t guest_tsc_khz;
+
+#ifndef MAX_NR_CPUID_ENTRIES
+#define MAX_NR_CPUID_ENTRIES 100
+#endif
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
+#define NMI_VECTOR             0x02
+
+#define X86_EFLAGS_FIXED        (1u << 1)
+
+#define X86_CR4_VME            (1ul << 0)
+#define X86_CR4_PVI            (1ul << 1)
+#define X86_CR4_TSD            (1ul << 2)
+#define X86_CR4_DE             (1ul << 3)
+#define X86_CR4_PSE            (1ul << 4)
+#define X86_CR4_PAE            (1ul << 5)
+#define X86_CR4_MCE            (1ul << 6)
+#define X86_CR4_PGE            (1ul << 7)
+#define X86_CR4_PCE            (1ul << 8)
+#define X86_CR4_OSFXSR         (1ul << 9)
+#define X86_CR4_OSXMMEXCPT     (1ul << 10)
+#define X86_CR4_UMIP           (1ul << 11)
+#define X86_CR4_LA57           (1ul << 12)
+#define X86_CR4_VMXE           (1ul << 13)
+#define X86_CR4_SMXE           (1ul << 14)
+#define X86_CR4_FSGSBASE       (1ul << 16)
+#define X86_CR4_PCIDE          (1ul << 17)
+#define X86_CR4_OSXSAVE                (1ul << 18)
+#define X86_CR4_SMEP           (1ul << 20)
+#define X86_CR4_SMAP           (1ul << 21)
+#define X86_CR4_PKE            (1ul << 22)
+
+struct xstate_header {
+       u64                             xstate_bv;
+       u64                             xcomp_bv;
+       u64                             reserved[6];
+} __attribute__((packed));
+
+struct xstate {
+       u8                              i387[512];
+       struct xstate_header            header;
+       u8                              extended_state_area[0];
+} __attribute__ ((packed, aligned (64)));
+
+#define XFEATURE_MASK_FP               BIT_ULL(0)
+#define XFEATURE_MASK_SSE              BIT_ULL(1)
+#define XFEATURE_MASK_YMM              BIT_ULL(2)
+#define XFEATURE_MASK_BNDREGS          BIT_ULL(3)
+#define XFEATURE_MASK_BNDCSR           BIT_ULL(4)
+#define XFEATURE_MASK_OPMASK           BIT_ULL(5)
+#define XFEATURE_MASK_ZMM_Hi256                BIT_ULL(6)
+#define XFEATURE_MASK_Hi16_ZMM         BIT_ULL(7)
+#define XFEATURE_MASK_PT               BIT_ULL(8)
+#define XFEATURE_MASK_PKRU             BIT_ULL(9)
+#define XFEATURE_MASK_PASID            BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER         BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL       BIT_ULL(12)
+#define XFEATURE_MASK_LBR              BIT_ULL(15)
+#define XFEATURE_MASK_XTILE_CFG                BIT_ULL(17)
+#define XFEATURE_MASK_XTILE_DATA       BIT_ULL(18)
+
+#define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK | \
+                                        XFEATURE_MASK_ZMM_Hi256 | \
+                                        XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE            (XFEATURE_MASK_XTILE_DATA | \
+                                        XFEATURE_MASK_XTILE_CFG)
+
+/* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
+enum cpuid_output_regs {
+       KVM_CPUID_EAX,
+       KVM_CPUID_EBX,
+       KVM_CPUID_ECX,
+       KVM_CPUID_EDX
+};
+
+/*
+ * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
+ * passed by value with no overhead.
+ */
+struct kvm_x86_cpu_feature {
+       u32     function;
+       u16     index;
+       u8      reg;
+       u8      bit;
+};
+#define        KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)                                \
+({                                                                             \
+       struct kvm_x86_cpu_feature feature = {                                  \
+               .function = fn,                                                 \
+               .index = idx,                                                   \
+               .reg = KVM_CPUID_##gpr,                                         \
+               .bit = __bit,                                                   \
+       };                                                                      \
+                                                                               \
+       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
+                         (fn & 0xc0000000) == 0x40000000 ||                    \
+                         (fn & 0xc0000000) == 0x80000000 ||                    \
+                         (fn & 0xc0000000) == 0xc0000000);                     \
+       kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));    \
+       feature;                                                                \
+})
+
+/*
+ * Basic Leafs, a.k.a. Intel defined
+ */
+#define        X86_FEATURE_MWAIT               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
+#define        X86_FEATURE_VMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
+#define        X86_FEATURE_SMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
+#define        X86_FEATURE_PDCM                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
+#define        X86_FEATURE_PCID                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
+#define X86_FEATURE_X2APIC             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
+#define        X86_FEATURE_MOVBE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
+#define        X86_FEATURE_TSC_DEADLINE_TIMER  KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
+#define        X86_FEATURE_XSAVE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
+#define        X86_FEATURE_OSXSAVE             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
+#define        X86_FEATURE_RDRAND              KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define        X86_FEATURE_HYPERVISOR          KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
+#define X86_FEATURE_PAE                        KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
+#define        X86_FEATURE_MCE                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
+#define        X86_FEATURE_APIC                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
+#define        X86_FEATURE_CLFLUSH             KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
+#define        X86_FEATURE_XMM                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
+#define        X86_FEATURE_XMM2                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
+#define        X86_FEATURE_FSGSBASE            KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
+#define        X86_FEATURE_TSC_ADJUST          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define        X86_FEATURE_SGX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
+#define        X86_FEATURE_HLE                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
+#define        X86_FEATURE_SMEP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
+#define        X86_FEATURE_INVPCID             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
+#define        X86_FEATURE_RTM                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
+#define        X86_FEATURE_MPX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
+#define        X86_FEATURE_SMAP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
+#define        X86_FEATURE_PCOMMIT             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
+#define        X86_FEATURE_CLFLUSHOPT          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
+#define        X86_FEATURE_CLWB                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
+#define        X86_FEATURE_UMIP                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
+#define        X86_FEATURE_PKU                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define        X86_FEATURE_OSPKE               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
+#define        X86_FEATURE_LA57                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
+#define        X86_FEATURE_RDPID               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define        X86_FEATURE_SGX_LC              KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
+#define        X86_FEATURE_SHSTK               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
+#define        X86_FEATURE_IBT                 KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
+#define        X86_FEATURE_AMX_TILE            KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
+#define        X86_FEATURE_SPEC_CTRL           KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
+#define        X86_FEATURE_ARCH_CAPABILITIES   KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
+#define        X86_FEATURE_PKS                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
+#define        X86_FEATURE_XTILECFG            KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
+#define        X86_FEATURE_XTILEDATA           KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
+#define        X86_FEATURE_XSAVES              KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
+#define        X86_FEATURE_XFD                 KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+#define X86_FEATURE_XTILEDATA_XFD      KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
+
+/*
+ * Extended Leafs, a.k.a. AMD defined
+ */
+#define        X86_FEATURE_SVM                 KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define        X86_FEATURE_NX                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
+#define        X86_FEATURE_GBPAGES             KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
+#define        X86_FEATURE_RDTSCP              KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
+#define        X86_FEATURE_LM                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
+#define        X86_FEATURE_INVTSC              KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
+#define        X86_FEATURE_RDPRU               KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
+#define        X86_FEATURE_AMD_IBPB            KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
+#define        X86_FEATURE_NPT                 KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
+#define        X86_FEATURE_LBRV                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
+#define        X86_FEATURE_NRIPS               KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
+#define X86_FEATURE_TSCRATEMSR          KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
+#define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
+#define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define        X86_FEATURE_VGIF                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_SEV                        KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
+#define X86_FEATURE_SEV_ES             KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+
+/*
+ * KVM defined paravirt features.
+ */
+#define X86_FEATURE_KVM_CLOCKSOURCE    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
+#define X86_FEATURE_KVM_NOP_IO_DELAY   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
+#define X86_FEATURE_KVM_MMU_OP         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
+#define X86_FEATURE_KVM_CLOCKSOURCE2   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
+#define X86_FEATURE_KVM_ASYNC_PF       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
+#define X86_FEATURE_KVM_STEAL_TIME     KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
+#define X86_FEATURE_KVM_PV_EOI         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
+#define X86_FEATURE_KVM_PV_UNHALT      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
+/* Bit 8 apparently isn't used?!?! */
+#define X86_FEATURE_KVM_PV_TLB_FLUSH   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
+#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
+#define X86_FEATURE_KVM_PV_SEND_IPI    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
+#define X86_FEATURE_KVM_POLL_CONTROL   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
+#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
+#define X86_FEATURE_KVM_ASYNC_PF_INT   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
+#define X86_FEATURE_KVM_MSI_EXT_DEST_ID        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
+#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
+#define X86_FEATURE_KVM_MIGRATION_CONTROL      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
+
+/*
+ * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
+ * value/property as opposed to a single-bit feature.  Again, pack the info
+ * into a 64-bit value to pass by value with no overhead.
+ */
+struct kvm_x86_cpu_property {
+       u32     function;
+       u8      index;
+       u8      reg;
+       u8      lo_bit;
+       u8      hi_bit;
+};
+#define        KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)                   \
+({                                                                             \
+       struct kvm_x86_cpu_property property = {                                \
+               .function = fn,                                                 \
+               .index = idx,                                                   \
+               .reg = KVM_CPUID_##gpr,                                         \
+               .lo_bit = low_bit,                                              \
+               .hi_bit = high_bit,                                             \
+       };                                                                      \
+                                                                               \
+       kvm_static_assert(low_bit < high_bit);                                  \
+       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
+                         (fn & 0xc0000000) == 0x40000000 ||                    \
+                         (fn & 0xc0000000) == 0x80000000 ||                    \
+                         (fn & 0xc0000000) == 0xc0000000);                     \
+       kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));   \
+       property;                                                               \
+})
+
+#define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+#define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
+#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
+
+#define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE           KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_HI         KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)
+
+#define X86_PROPERTY_XSTATE_TILE_SIZE          KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
+#define X86_PROPERTY_XSTATE_TILE_OFFSET                KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
+#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES    KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
+#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES      KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
+#define X86_PROPERTY_AMX_BYTES_PER_TILE                KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
+#define X86_PROPERTY_AMX_BYTES_PER_ROW         KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
+#define X86_PROPERTY_AMX_NR_TILE_REGS          KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
+#define X86_PROPERTY_AMX_MAX_ROWS              KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
+
+#define X86_PROPERTY_MAX_KVM_LEAF              KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_EXT_LEAF              KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
+#define X86_PROPERTY_MAX_PHY_ADDR              KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_MAX_VIRT_ADDR             KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR                KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
+#define X86_PROPERTY_SEV_C_BIT                 KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
+#define X86_PROPERTY_PHYS_ADDR_REDUCTION       KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+
+#define X86_PROPERTY_MAX_CENTAUR_LEAF          KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
+
+/*
+ * Intel's architectural PMU events are bizarre.  They have a "feature" bit
+ * that indicates the feature is _not_ supported, and a property that states
+ * the length of the bit mask of unsupported features.  A feature is supported
+ * if the size of the bit mask is larger than the "unavailable" bit, and said
+ * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters.  Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
+ *
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
+ */
+struct kvm_x86_pmu_feature {
+       struct kvm_x86_cpu_feature f;
+};
+#define        KVM_X86_PMU_FEATURE(__reg, __bit)                               \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {                          \
+               .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),         \
+       };                                                              \
+                                                                       \
+       kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||         \
+                         KVM_CPUID_##__reg == KVM_CPUID_ECX);          \
+       feature;                                                        \
+})
+
+#define X86_PMU_FEATURE_CPU_CYCLES                     KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED                  KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES               KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES                 KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES                     KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED           KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED          KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS                  KVM_X86_PMU_FEATURE(EBX, 7)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED            KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED               KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED     KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED            KVM_X86_PMU_FEATURE(ECX, 3)
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+       unsigned int x86;
+
+       x86 = (eax >> 8) & 0xf;
+
+       if (x86 == 0xf)
+               x86 += (eax >> 20) & 0xff;
+
+       return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+       return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
+
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK        BIT_ULL(0)
+#define PTE_WRITABLE_MASK       BIT_ULL(1)
+#define PTE_USER_MASK           BIT_ULL(2)
+#define PTE_ACCESSED_MASK       BIT_ULL(5)
+#define PTE_DIRTY_MASK          BIT_ULL(6)
+#define PTE_LARGE_MASK          BIT_ULL(7)
+#define PTE_GLOBAL_MASK         BIT_ULL(8)
+#define PTE_NX_MASK             BIT_ULL(63)
+
+#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
+
+#define PAGE_SHIFT             12
+#define PAGE_SIZE              (1ULL << PAGE_SHIFT)
+#define PAGE_MASK              (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
+
+#define HUGEPAGE_SHIFT(x)      (PAGE_SHIFT + (((x) - 1) * 9))
+#define HUGEPAGE_SIZE(x)       (1UL << HUGEPAGE_SHIFT(x))
+#define HUGEPAGE_MASK(x)       (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
+
+#define PTE_GET_PA(pte)                ((pte) & PHYSICAL_PAGE_MASK)
+#define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
+
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+       u64 rax;
+       u64 rcx;
+       u64 rdx;
+       u64 rbx;
+       u64 rsp;
+       u64 rbp;
+       u64 rsi;
+       u64 rdi;
+       u64 r8;
+       u64 r9;
+       u64 r10;
+       u64 r11;
+       u64 r12;
+       u64 r13;
+       u64 r14;
+       u64 r15;
+};
+
+struct desc64 {
+       uint16_t limit0;
+       uint16_t base0;
+       unsigned base1:8, type:4, s:1, dpl:2, p:1;
+       unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+       uint32_t base3;
+       uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+       uint16_t size;
+       uint64_t address;
+} __attribute__((packed));
+
+struct kvm_x86_state {
+       struct kvm_xsave *xsave;
+       struct kvm_vcpu_events events;
+       struct kvm_mp_state mp_state;
+       struct kvm_regs regs;
+       struct kvm_xcrs xcrs;
+       struct kvm_sregs sregs;
+       struct kvm_debugregs debugregs;
+       union {
+               struct kvm_nested_state nested;
+               char nested_[16384];
+       };
+       struct kvm_msrs msrs;
+};
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+       return ((uint64_t)desc->base3 << 32) |
+               (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+       uint32_t eax, edx;
+       uint64_t tsc_val;
+       /*
+        * The lfence is to wait (on Intel CPUs) until all previous
+        * instructions have been executed. If software requires RDTSC to be
+        * executed prior to execution of any subsequent instruction, it can
+        * execute LFENCE immediately after RDTSC
+        */
+       __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+       tsc_val = ((uint64_t)edx) << 32 | eax;
+       return tsc_val;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+       uint32_t eax, edx;
+
+       __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+       return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+       uint16_t tmp;
+
+       __asm__ __volatile__("in %%dx, %%ax"
+               : /* output */ "=a" (tmp)
+               : /* input */ "d" (port));
+
+       return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+       uint16_t es;
+
+       __asm__ __volatile__("mov %%es, %[es]"
+                            : /* output */ [es]"=rm"(es));
+       return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+       uint16_t cs;
+
+       __asm__ __volatile__("mov %%cs, %[cs]"
+                            : /* output */ [cs]"=rm"(cs));
+       return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+       uint16_t ss;
+
+       __asm__ __volatile__("mov %%ss, %[ss]"
+                            : /* output */ [ss]"=rm"(ss));
+       return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+       uint16_t ds;
+
+       __asm__ __volatile__("mov %%ds, %[ds]"
+                            : /* output */ [ds]"=rm"(ds));
+       return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+       uint16_t fs;
+
+       __asm__ __volatile__("mov %%fs, %[fs]"
+                            : /* output */ [fs]"=rm"(fs));
+       return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+       uint16_t gs;
+
+       __asm__ __volatile__("mov %%gs, %[gs]"
+                            : /* output */ [gs]"=rm"(gs));
+       return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+       uint16_t tr;
+
+       __asm__ __volatile__("str %[tr]"
+                            : /* output */ [tr]"=rm"(tr));
+       return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+       uint64_t cr0;
+
+       __asm__ __volatile__("mov %%cr0, %[cr0]"
+                            : /* output */ [cr0]"=r"(cr0));
+       return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+       uint64_t cr3;
+
+       __asm__ __volatile__("mov %%cr3, %[cr3]"
+                            : /* output */ [cr3]"=r"(cr3));
+       return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+       uint64_t cr4;
+
+       __asm__ __volatile__("mov %%cr4, %[cr4]"
+                            : /* output */ [cr4]"=r"(cr4));
+       return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+       __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline u64 xgetbv(u32 index)
+{
+       u32 eax, edx;
+
+       __asm__ __volatile__("xgetbv;"
+                    : "=a" (eax), "=d" (edx)
+                    : "c" (index));
+       return eax | ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+       u32 eax = value;
+       u32 edx = value >> 32;
+
+       __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void wrpkru(u32 pkru)
+{
+       /* Note, ECX and EDX are architecturally required to be '0'. */
+       asm volatile(".byte 0x0f,0x01,0xef\n\t"
+                    : : "a" (pkru), "c"(0), "d"(0));
+}
+
+static inline struct desc_ptr get_gdt(void)
+{
+       struct desc_ptr gdt;
+       __asm__ __volatile__("sgdt %[gdt]"
+                            : /* output */ [gdt]"=m"(gdt));
+       return gdt;
+}
+
+static inline struct desc_ptr get_idt(void)
+{
+       struct desc_ptr idt;
+       __asm__ __volatile__("sidt %[idt]"
+                            : /* output */ [idt]"=m"(idt));
+       return idt;
+}
+
+static inline void outl(uint16_t port, uint32_t value)
+{
+       __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
+static inline void __cpuid(uint32_t function, uint32_t index,
+                          uint32_t *eax, uint32_t *ebx,
+                          uint32_t *ecx, uint32_t *edx)
+{
+       *eax = function;
+       *ecx = index;
+
+       asm volatile("cpuid"
+           : "=a" (*eax),
+             "=b" (*ebx),
+             "=c" (*ecx),
+             "=d" (*edx)
+           : "0" (*eax), "2" (*ecx)
+           : "memory");
+}
+
+static inline void cpuid(uint32_t function,
+                        uint32_t *eax, uint32_t *ebx,
+                        uint32_t *ecx, uint32_t *edx)
+{
+       return __cpuid(function, 0, eax, ebx, ecx, edx);
+}
+
+static inline uint32_t this_cpu_fms(void)
+{
+       uint32_t eax, ebx, ecx, edx;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       return eax;
+}
+
+static inline uint32_t this_cpu_family(void)
+{
+       return x86_family(this_cpu_fms());
+}
+
+static inline uint32_t this_cpu_model(void)
+{
+       return x86_model(this_cpu_fms());
+}
+
+static inline bool this_cpu_vendor_string_is(const char *vendor)
+{
+       const uint32_t *chunk = (const uint32_t *)vendor;
+       uint32_t eax, ebx, ecx, edx;
+
+       cpuid(0, &eax, &ebx, &ecx, &edx);
+       return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+}
+
+static inline bool this_cpu_is_intel(void)
+{
+       return this_cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+static inline bool this_cpu_is_amd(void)
+{
+       return this_cpu_vendor_string_is("AuthenticAMD");
+}
+
+static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
+                                     uint8_t reg, uint8_t lo, uint8_t hi)
+{
+       uint32_t gprs[4];
+
+       __cpuid(function, index,
+               &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
+               &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
+
+       return (gprs[reg] & GENMASK(hi, lo)) >> lo;
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+       return __this_cpu_has(feature.function, feature.index,
+                             feature.reg, feature.bit, feature.bit);
+}
+
+static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+{
+       return __this_cpu_has(property.function, property.index,
+                             property.reg, property.lo_bit, property.hi_bit);
+}
+
+static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+       uint32_t max_leaf;
+
+       switch (property.function & 0xc0000000) {
+       case 0:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+               break;
+       case 0x40000000:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+               break;
+       case 0x80000000:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+               break;
+       case 0xc0000000:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+       }
+       return max_leaf >= property.function;
+}
+
+static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+       uint32_t nr_bits;
+
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+       }
+
+       GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+       nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || this_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t this_cpu_supported_xcr0(void)
+{
+       if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+               return 0;
+
+       return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+              ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+typedef u32            __attribute__((vector_size(16))) sse128_t;
+#define __sse128_u     union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x)   ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x)   ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
+
+static inline void read_sse_reg(int reg, sse128_t *data)
+{
+       switch (reg) {
+       case 0:
+               asm("movdqa %%xmm0, %0" : "=m"(*data));
+               break;
+       case 1:
+               asm("movdqa %%xmm1, %0" : "=m"(*data));
+               break;
+       case 2:
+               asm("movdqa %%xmm2, %0" : "=m"(*data));
+               break;
+       case 3:
+               asm("movdqa %%xmm3, %0" : "=m"(*data));
+               break;
+       case 4:
+               asm("movdqa %%xmm4, %0" : "=m"(*data));
+               break;
+       case 5:
+               asm("movdqa %%xmm5, %0" : "=m"(*data));
+               break;
+       case 6:
+               asm("movdqa %%xmm6, %0" : "=m"(*data));
+               break;
+       case 7:
+               asm("movdqa %%xmm7, %0" : "=m"(*data));
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline void write_sse_reg(int reg, const sse128_t *data)
+{
+       switch (reg) {
+       case 0:
+               asm("movdqa %0, %%xmm0" : : "m"(*data));
+               break;
+       case 1:
+               asm("movdqa %0, %%xmm1" : : "m"(*data));
+               break;
+       case 2:
+               asm("movdqa %0, %%xmm2" : : "m"(*data));
+               break;
+       case 3:
+               asm("movdqa %0, %%xmm3" : : "m"(*data));
+               break;
+       case 4:
+               asm("movdqa %0, %%xmm4" : : "m"(*data));
+               break;
+       case 5:
+               asm("movdqa %0, %%xmm5" : : "m"(*data));
+               break;
+       case 6:
+               asm("movdqa %0, %%xmm6" : : "m"(*data));
+               break;
+       case 7:
+               asm("movdqa %0, %%xmm7" : : "m"(*data));
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline void cpu_relax(void)
+{
+       asm volatile("rep; nop" ::: "memory");
+}
+
+static inline void udelay(unsigned long usec)
+{
+       uint64_t start, now, cycles;
+
+       GUEST_ASSERT(guest_tsc_khz);
+       cycles = guest_tsc_khz / 1000 * usec;
+
+       /*
+        * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
+        * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
+        */
+       start = rdtsc();
+       do {
+               now = rdtsc();
+       } while (now - start < cycles);
+}
+
+#define ud2()                  \
+       __asm__ __volatile__(   \
+               "ud2\n" \
+               )
+
+#define hlt()                  \
+       __asm__ __volatile__(   \
+               "hlt\n" \
+               )
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
+
+static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
+                                struct kvm_msrs *msrs)
+{
+       int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
+
+       TEST_ASSERT(r == msrs->nmsrs,
+                   "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
+{
+       int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
+
+       TEST_ASSERT(r == msrs->nmsrs,
+                   "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
+                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
+                                     struct kvm_debugregs *debugregs)
+{
+       vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
+                                     struct kvm_debugregs *debugregs)
+{
+       vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
+                                 struct kvm_xsave *xsave)
+{
+       vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
+}
+static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
+                                  struct kvm_xsave *xsave)
+{
+       vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
+}
+static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
+                                 struct kvm_xsave *xsave)
+{
+       vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
+}
+static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
+                                struct kvm_xcrs *xcrs)
+{
+       vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
+}
+static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
+{
+       vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+                                              uint32_t function, uint32_t index);
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+
+static inline uint32_t kvm_cpu_fms(void)
+{
+       return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
+}
+
+static inline uint32_t kvm_cpu_family(void)
+{
+       return x86_family(kvm_cpu_fms());
+}
+
+static inline uint32_t kvm_cpu_model(void)
+{
+       return x86_model(kvm_cpu_fms());
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+                  struct kvm_x86_cpu_feature feature);
+
+static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+       return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+                           struct kvm_x86_cpu_property property);
+
+static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+{
+       return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
+}
+
+static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+       uint32_t max_leaf;
+
+       switch (property.function & 0xc0000000) {
+       case 0:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+               break;
+       case 0x40000000:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+               break;
+       case 0x80000000:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+               break;
+       case 0xc0000000:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+       }
+       return max_leaf >= property.function;
+}
+
+static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+       uint32_t nr_bits;
+
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+       }
+
+       TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+       nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+       if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+               return 0;
+
+       return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+              ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+static inline size_t kvm_cpuid2_size(int nr_entries)
+{
+       return sizeof(struct kvm_cpuid2) +
+              sizeof(struct kvm_cpuid_entry2) * nr_entries;
+}
+
+/*
+ * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
+ * entries sized to hold @nr_entries.  The caller is responsible for freeing
+ * the struct.
+ */
+static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
+{
+       struct kvm_cpuid2 *cpuid;
+
+       cpuid = malloc(kvm_cpuid2_size(nr_entries));
+       TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
+
+       cpuid->nent = nr_entries;
+
+       return cpuid;
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+
+static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+                                                             uint32_t function,
+                                                             uint32_t index)
+{
+       return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
+                                                         function, index);
+}
+
+static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+                                                           uint32_t function)
+{
+       return __vcpu_get_cpuid_entry(vcpu, function, 0);
+}
+
+static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+       int r;
+
+       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+       r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+       if (r)
+               return r;
+
+       /* On success, refresh the cache to pick up adjustments made by KVM. */
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+       return 0;
+}
+
+static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+       vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+
+       /* Refresh the cache to pick up adjustments made by KVM. */
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+                                 struct kvm_x86_cpu_feature feature)
+{
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+       return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+                                    struct kvm_x86_cpu_feature feature,
+                                    bool set);
+
+static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
+                                         struct kvm_x86_cpu_feature feature)
+{
+       vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
+
+}
+
+static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+                                           struct kvm_x86_cpu_feature feature)
+{
+       vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)                          \
+do {                                                                           \
+       if (__builtin_constant_p(msr)) {                                        \
+               TEST_ASSERT(cond, fmt, str, args);                              \
+       } else if (!(cond)) {                                                   \
+               char buf[16];                                                   \
+                                                                               \
+               snprintf(buf, sizeof(buf), "MSR 0x%x", msr);                    \
+               TEST_ASSERT(cond, fmt, buf, args);                              \
+       }                                                                       \
+} while (0)
+
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
+{
+       return msr != MSR_IA32_TSC;
+}
+
+#define vcpu_set_msr(vcpu, msr, val)                                                   \
+do {                                                                                   \
+       uint64_t r, v = val;                                                            \
+                                                                                       \
+       TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,                               \
+                       "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);      \
+       if (!is_durable_msr(msr))                                                       \
+               break;                                                                  \
+       r = vcpu_get_msr(vcpu, msr);                                                    \
+       TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+struct ex_regs {
+       uint64_t rax, rcx, rdx, rbx;
+       uint64_t rbp, rsi, rdi;
+       uint64_t r8, r9, r10, r11;
+       uint64_t r12, r13, r14, r15;
+       uint64_t vector;
+       uint64_t error_code;
+       uint64_t rip;
+       uint64_t cs;
+       uint64_t rflags;
+};
+
+struct idt_entry {
+       uint16_t offset0;
+       uint16_t selector;
+       uint16_t ist : 3;
+       uint16_t : 5;
+       uint16_t type : 4;
+       uint16_t : 1;
+       uint16_t dpl : 2;
+       uint16_t p : 1;
+       uint16_t offset1;
+       uint32_t offset2; uint32_t reserved;
+};
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                       void (*handler)(struct ex_regs *));
+
+/* If a toddler were to say "abracadabra". */
+#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
+
+/*
+ * KVM selftest exception fixup uses registers to coordinate with the exception
+ * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
+ * per-CPU data.  Using only registers avoids having to map memory into the
+ * guest, doesn't require a valid, stable GS.base, and reduces the risk of
+ * for recursive faults when accessing memory in the handler.  The downside to
+ * using registers is that it restricts what registers can be used by the actual
+ * instruction.  But, selftests are 64-bit only, making register* pressure a
+ * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
+ * by the callee, and except for r11 are not implicit parameters to any
+ * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
+ * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
+ * is higher priority than testing non-faulting SYSCALL/SYSRET.
+ *
+ * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
+ * is guaranteed to be non-zero on fault.
+ *
+ * REGISTER INPUTS:
+ * r9  = MAGIC
+ * r10 = RIP
+ * r11 = new RIP on fault
+ *
+ * REGISTER OUTPUTS:
+ * r9  = exception vector (non-zero)
+ * r10 = error code
+ */
+#define __KVM_ASM_SAFE(insn, fep)                              \
+       "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"   \
+       "lea 1f(%%rip), %%r10\n\t"                              \
+       "lea 2f(%%rip), %%r11\n\t"                              \
+       fep "1: " insn "\n\t"                                   \
+       "xor %%r9, %%r9\n\t"                                    \
+       "2:\n\t"                                                \
+       "mov  %%r9b, %[vector]\n\t"                             \
+       "mov  %%r10, %[error_code]\n\t"
+
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
+#define KVM_ASM_SAFE_OUTPUTS(v, ec)    [vector] "=qm"(v), [error_code] "=rm"(ec)
+#define KVM_ASM_SAFE_CLOBBERS  "r9", "r10", "r11"
+
+#define kvm_asm_safe(insn, inputs...)                                  \
+({                                                                     \
+       uint64_t ign_error_code;                                        \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define kvm_asm_safe_ec(insn, error_code, inputs...)                   \
+({                                                                     \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define kvm_asm_safe_fep(insn, inputs...)                              \
+({                                                                     \
+       uint64_t ign_error_code;                                        \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)               \
+({                                                                     \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE_FEP(insn)                             \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                   \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)  \
+{                                                                      \
+       uint64_t error_code;                                            \
+       uint8_t vector;                                                 \
+       uint32_t a, d;                                                  \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE##_FEP(#insn)                          \
+                    : "=a"(a), "=d"(d),                                \
+                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : "c"(idx)                                         \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+                                                                       \
+       *val = (uint64_t)a | ((uint64_t)d << 32);                       \
+       return vector;                                                  \
+}
+
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn)                              \
+       BUILD_READ_U64_SAFE_HELPER(insn, , )                            \
+       BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                    \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
+static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+{
+       return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
+}
+
+static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+{
+       u32 eax = value;
+       u32 edx = value >> 32;
+
+       return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+}
+
+bool kvm_is_tdp_enabled(void);
+
+static inline bool kvm_is_pmu_enabled(void)
+{
+       return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+       return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+                                   int *level);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+                      uint64_t a3);
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+
+static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
+                                                    uint64_t size, uint64_t flags)
+{
+       return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
+}
+
+static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
+                                              uint64_t flags)
+{
+       uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
+
+       GUEST_ASSERT(!ret);
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
+
+#define vm_xsave_require_permission(xfeature)  \
+       __vm_xsave_require_permission(xfeature, #xfeature)
+
+enum pg_level {
+       PG_LEVEL_NONE,
+       PG_LEVEL_4K,
+       PG_LEVEL_2M,
+       PG_LEVEL_1G,
+       PG_LEVEL_512G,
+       PG_LEVEL_NUM
+};
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+                   uint64_t nr_bytes, int level);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM          (1UL<<2) /* Emulation */
+#define X86_CR0_TS          (1UL<<3) /* Task Switched */
+#define X86_CR0_ET          (1UL<<4) /* Extension Type */
+#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP          (1UL<<16) /* Write Protect */
+#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG          (1UL<<31) /* Paging */
+
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
+
+#define PFERR_PRESENT_MASK     BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK       BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK                BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK                BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK       BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK          BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK         BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK  BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS  BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+
+bool sys_clocksource_is_based_on_tsc(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h

new file mode 100644 (file)

index 0000000..82c11c8
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+       SEV_GUEST_STATE_UNINITIALIZED = 0,
+       SEV_GUEST_STATE_LAUNCH_UPDATE,
+       SEV_GUEST_STATE_LAUNCH_SECRET,
+       SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG      (1UL << 0)
+#define SEV_POLICY_ES          (1UL << 2)
+
+#define GHCB_MSR_TERM_REQ      0x100
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+                                          struct kvm_vcpu **cpu);
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct.  The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable.  Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg)                                   \
+({                                                                     \
+       int r;                                                          \
+                                                                       \
+       union {                                                         \
+               struct kvm_sev_cmd c;                                   \
+               unsigned long raw;                                      \
+       } sev_cmd = { .c = {                                            \
+               .id = (cmd),                                            \
+               .data = (uint64_t)(arg),                                \
+               .sev_fd = (vm)->arch.sev_fd,                            \
+       } };                                                            \
+                                                                       \
+       r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw);        \
+       r ?: sev_cmd.c.error;                                           \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg)                                     \
+({                                                                     \
+       int ret = __vm_sev_ioctl(vm, cmd, arg);                         \
+                                                                       \
+       __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);               \
+})
+
+void sev_vm_init(struct kvm_vm *vm);
+void sev_es_vm_init(struct kvm_vm *vm);
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+                                                struct userspace_mem_region *region)
+{
+       struct kvm_enc_region range = {
+               .addr = region->region.userspace_addr,
+               .size = region->region.memory_size,
+       };
+
+       vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+                                         uint64_t size)
+{
+       struct kvm_sev_launch_update_data update_data = {
+               .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+               .len = size,
+       };
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h

new file mode 100644 (file)

index 0000000..29cffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SELFTEST_KVM_SVM_H
+#define SELFTEST_KVM_SVM_H
+
+enum {
+       INTERCEPT_INTR,
+       INTERCEPT_NMI,
+       INTERCEPT_SMI,
+       INTERCEPT_INIT,
+       INTERCEPT_VINTR,
+       INTERCEPT_SELECTIVE_CR0,
+       INTERCEPT_STORE_IDTR,
+       INTERCEPT_STORE_GDTR,
+       INTERCEPT_STORE_LDTR,
+       INTERCEPT_STORE_TR,
+       INTERCEPT_LOAD_IDTR,
+       INTERCEPT_LOAD_GDTR,
+       INTERCEPT_LOAD_LDTR,
+       INTERCEPT_LOAD_TR,
+       INTERCEPT_RDTSC,
+       INTERCEPT_RDPMC,
+       INTERCEPT_PUSHF,
+       INTERCEPT_POPF,
+       INTERCEPT_CPUID,
+       INTERCEPT_RSM,
+       INTERCEPT_IRET,
+       INTERCEPT_INTn,
+       INTERCEPT_INVD,
+       INTERCEPT_PAUSE,
+       INTERCEPT_HLT,
+       INTERCEPT_INVLPG,
+       INTERCEPT_INVLPGA,
+       INTERCEPT_IOIO_PROT,
+       INTERCEPT_MSR_PROT,
+       INTERCEPT_TASK_SWITCH,
+       INTERCEPT_FERR_FREEZE,
+       INTERCEPT_SHUTDOWN,
+       INTERCEPT_VMRUN,
+       INTERCEPT_VMMCALL,
+       INTERCEPT_VMLOAD,
+       INTERCEPT_VMSAVE,
+       INTERCEPT_STGI,
+       INTERCEPT_CLGI,
+       INTERCEPT_SKINIT,
+       INTERCEPT_RDTSCP,
+       INTERCEPT_ICEBP,
+       INTERCEPT_WBINVD,
+       INTERCEPT_MONITOR,
+       INTERCEPT_MWAIT,
+       INTERCEPT_MWAIT_COND,
+       INTERCEPT_XSETBV,
+       INTERCEPT_RDPRU,
+};
+
+struct hv_vmcb_enlightenments {
+       struct __packed hv_enlightenments_control {
+               u32 nested_flush_hypercall:1;
+               u32 msr_bitmap:1;
+               u32 enlightened_npt_tlb: 1;
+               u32 reserved:29;
+       } __packed hv_enlightenments_control;
+       u32 hv_vp_id;
+       u64 hv_vm_id;
+       u64 partition_assist_page;
+       u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
+
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL                    0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH   (1)
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+       u32 intercept_cr;
+       u32 intercept_dr;
+       u32 intercept_exceptions;
+       u64 intercept;
+       u8 reserved_1[40];
+       u16 pause_filter_thresh;
+       u16 pause_filter_count;
+       u64 iopm_base_pa;
+       u64 msrpm_base_pa;
+       u64 tsc_offset;
+       u32 asid;
+       u8 tlb_ctl;
+       u8 reserved_2[3];
+       u32 int_ctl;
+       u32 int_vector;
+       u32 int_state;
+       u8 reserved_3[4];
+       u32 exit_code;
+       u32 exit_code_hi;
+       u64 exit_info_1;
+       u64 exit_info_2;
+       u32 exit_int_info;
+       u32 exit_int_info_err;
+       u64 nested_ctl;
+       u64 avic_vapic_bar;
+       u8 reserved_4[8];
+       u32 event_inj;
+       u32 event_inj_err;
+       u64 nested_cr3;
+       u64 virt_ext;
+       u32 clean;
+       u32 reserved_5;
+       u64 next_rip;
+       u8 insn_len;
+       u8 insn_bytes[15];
+       u64 avic_backing_page;  /* Offset 0xe0 */
+       u8 reserved_6[8];       /* Offset 0xe8 */
+       u64 avic_logical_id;    /* Offset 0xf0 */
+       u64 avic_physical_id;   /* Offset 0xf8 */
+       u8 reserved_7[8];
+       u64 vmsa_pa;            /* Used for an SEV-ES guest */
+       u8 reserved_8[720];
+       /*
+        * Offset 0x3e0, 32 bytes reserved
+        * for use by hypervisor/software.
+        */
+       union {
+               struct hv_vmcb_enlightenments hv_enlightenments;
+               u8 reserved_sw[32];
+       };
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+#define TLB_CONTROL_FLUSH_ASID 3
+#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK   0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
+
+#define SVM_NESTED_CTL_NP_ENABLE       BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE      BIT(1)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+       u16 selector;
+       u16 attrib;
+       u32 limit;
+       u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+       struct vmcb_seg es;
+       struct vmcb_seg cs;
+       struct vmcb_seg ss;
+       struct vmcb_seg ds;
+       struct vmcb_seg fs;
+       struct vmcb_seg gs;
+       struct vmcb_seg gdtr;
+       struct vmcb_seg ldtr;
+       struct vmcb_seg idtr;
+       struct vmcb_seg tr;
+       u8 reserved_1[43];
+       u8 cpl;
+       u8 reserved_2[4];
+       u64 efer;
+       u8 reserved_3[112];
+       u64 cr4;
+       u64 cr3;
+       u64 cr0;
+       u64 dr7;
+       u64 dr6;
+       u64 rflags;
+       u64 rip;
+       u8 reserved_4[88];
+       u64 rsp;
+       u8 reserved_5[24];
+       u64 rax;
+       u64 star;
+       u64 lstar;
+       u64 cstar;
+       u64 sfmask;
+       u64 kernel_gs_base;
+       u64 sysenter_cs;
+       u64 sysenter_esp;
+       u64 sysenter_eip;
+       u64 cr2;
+       u8 reserved_6[32];
+       u64 g_pat;
+       u64 dbgctl;
+       u64 br_from;
+       u64 br_to;
+       u64 last_excp_from;
+       u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+       struct vmcb_control_area control;
+       struct vmcb_save_area save;
+};
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_READ     0
+#define INTERCEPT_CR3_READ     3
+#define INTERCEPT_CR4_READ     4
+#define INTERCEPT_CR8_READ     8
+#define INTERCEPT_CR0_WRITE    (16 + 0)
+#define INTERCEPT_CR3_WRITE    (16 + 3)
+#define INTERCEPT_CR4_WRITE    (16 + 4)
+#define INTERCEPT_CR8_WRITE    (16 + 8)
+
+#define INTERCEPT_DR0_READ     0
+#define INTERCEPT_DR1_READ     1
+#define INTERCEPT_DR2_READ     2
+#define INTERCEPT_DR3_READ     3
+#define INTERCEPT_DR4_READ     4
+#define INTERCEPT_DR5_READ     5
+#define INTERCEPT_DR6_READ     6
+#define INTERCEPT_DR7_READ     7
+#define INTERCEPT_DR0_WRITE    (16 + 0)
+#define INTERCEPT_DR1_WRITE    (16 + 1)
+#define INTERCEPT_DR2_WRITE    (16 + 2)
+#define INTERCEPT_DR3_WRITE    (16 + 3)
+#define INTERCEPT_DR4_WRITE    (16 + 4)
+#define INTERCEPT_DR5_WRITE    (16 + 5)
+#define INTERCEPT_DR6_WRITE    (16 + 6)
+#define INTERCEPT_DR7_WRITE    (16 + 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define        SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define        SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define        SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define        SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXITINFO_REG_MASK 0x0F
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h

new file mode 100644 (file)

index 0000000..b74c6dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <asm/svm.h>
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+struct svm_test_data {
+       /* VMCB */
+       struct vmcb *vmcb; /* gva */
+       void *vmcb_hva;
+       uint64_t vmcb_gpa;
+
+       /* host state-save area */
+       struct vmcb_save_area *save_area; /* gva */
+       void *save_area_hva;
+       uint64_t save_area_gpa;
+
+       /* MSR-Bitmap */
+       void *msr; /* gva */
+       void *msr_hva;
+       uint64_t msr_gpa;
+};
+
+static inline void vmmcall(void)
+{
+       /*
+        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+        * use of this function is to exit to L1 from L2.  Clobber all other
+        * GPRs as L1 doesn't correctly preserve them during vmexits.
+        */
+       __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
+                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
+                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                              "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+#define stgi()                 \
+       __asm__ __volatile__(   \
+               "stgi\n"        \
+               )
+
+#define clgi()                 \
+       __asm__ __volatile__(   \
+               "clgi\n"        \
+               )
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+
+int open_sev_dev_path_or_exit(void);
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h

new file mode 100644 (file)

index 0000000..d3825dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/ucall.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h

new file mode 100644 (file)

index 0000000..edb3c39
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -0,0 +1,575 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <asm/vmx.h>
+
+#include <stdint.h>
+#include "processor.h"
+#include "apic.h"
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_INTR_WINDOW_EXITING          0x00000004
+#define CPU_BASED_USE_TSC_OFFSETTING           0x00000008
+#define CPU_BASED_HLT_EXITING                  0x00000080
+#define CPU_BASED_INVLPG_EXITING               0x00000200
+#define CPU_BASED_MWAIT_EXITING                        0x00000400
+#define CPU_BASED_RDPMC_EXITING                        0x00000800
+#define CPU_BASED_RDTSC_EXITING                        0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING             0x00008000
+#define CPU_BASED_CR3_STORE_EXITING            0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING             0x00080000
+#define CPU_BASED_CR8_STORE_EXITING            0x00100000
+#define CPU_BASED_TPR_SHADOW                   0x00200000
+#define CPU_BASED_NMI_WINDOW_EXITING           0x00400000
+#define CPU_BASED_MOV_DR_EXITING               0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING            0x01000000
+#define CPU_BASED_USE_IO_BITMAPS               0x02000000
+#define CPU_BASED_MONITOR_TRAP                 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS              0x10000000
+#define CPU_BASED_MONITOR_EXITING              0x20000000
+#define CPU_BASED_PAUSE_EXITING                        0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS  0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT              0x00000002
+#define SECONDARY_EXEC_DESC                    0x00000004
+#define SECONDARY_EXEC_ENABLE_RDTSCP           0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE  0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID             0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING          0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST      0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT      0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY   0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING      0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING          0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC           0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS             0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING          0x00010000
+#define SECONDARY_EXEC_ENABLE_PML              0x00020000
+#define SECONDARY_EPT_VE                       0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE          0x00100000
+#define SECONDARY_EXEC_TSC_SCALING             0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK                        0x00000001
+#define PIN_BASED_NMI_EXITING                  0x00000008
+#define PIN_BASED_VIRTUAL_NMIS                 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER         0x00000040
+#define PIN_BASED_POSTED_INTR                  0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS            0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE           0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL     0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT               0x00008000
+#define VM_EXIT_SAVE_IA32_PAT                  0x00040000
+#define VM_EXIT_LOAD_IA32_PAT                  0x00080000
+#define VM_EXIT_SAVE_IA32_EFER                 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER                 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER      0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR      0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS           0x00000004
+#define VM_ENTRY_IA32E_MODE                    0x00000200
+#define VM_ENTRY_SMM                           0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR            0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL    0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT                 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER                        0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR     0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA                 0x00000020
+
+#define VMX_EPT_VPID_CAP_1G_PAGES              0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS               0x00200000
+
+#define EXIT_REASON_FAILED_VMENTRY     0x80000000
+
+enum vmcs_field {
+       VIRTUAL_PROCESSOR_ID            = 0x00000000,
+       POSTED_INTR_NV                  = 0x00000002,
+       GUEST_ES_SELECTOR               = 0x00000800,
+       GUEST_CS_SELECTOR               = 0x00000802,
+       GUEST_SS_SELECTOR               = 0x00000804,
+       GUEST_DS_SELECTOR               = 0x00000806,
+       GUEST_FS_SELECTOR               = 0x00000808,
+       GUEST_GS_SELECTOR               = 0x0000080a,
+       GUEST_LDTR_SELECTOR             = 0x0000080c,
+       GUEST_TR_SELECTOR               = 0x0000080e,
+       GUEST_INTR_STATUS               = 0x00000810,
+       GUEST_PML_INDEX                 = 0x00000812,
+       HOST_ES_SELECTOR                = 0x00000c00,
+       HOST_CS_SELECTOR                = 0x00000c02,
+       HOST_SS_SELECTOR                = 0x00000c04,
+       HOST_DS_SELECTOR                = 0x00000c06,
+       HOST_FS_SELECTOR                = 0x00000c08,
+       HOST_GS_SELECTOR                = 0x00000c0a,
+       HOST_TR_SELECTOR                = 0x00000c0c,
+       IO_BITMAP_A                     = 0x00002000,
+       IO_BITMAP_A_HIGH                = 0x00002001,
+       IO_BITMAP_B                     = 0x00002002,
+       IO_BITMAP_B_HIGH                = 0x00002003,
+       MSR_BITMAP                      = 0x00002004,
+       MSR_BITMAP_HIGH                 = 0x00002005,
+       VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+       VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+       VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+       VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+       VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+       VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+       PML_ADDRESS                     = 0x0000200e,
+       PML_ADDRESS_HIGH                = 0x0000200f,
+       TSC_OFFSET                      = 0x00002010,
+       TSC_OFFSET_HIGH                 = 0x00002011,
+       VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+       VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+       APIC_ACCESS_ADDR                = 0x00002014,
+       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
+       POSTED_INTR_DESC_ADDR           = 0x00002016,
+       POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
+       EPT_POINTER                     = 0x0000201a,
+       EPT_POINTER_HIGH                = 0x0000201b,
+       EOI_EXIT_BITMAP0                = 0x0000201c,
+       EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
+       EOI_EXIT_BITMAP1                = 0x0000201e,
+       EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
+       EOI_EXIT_BITMAP2                = 0x00002020,
+       EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
+       EOI_EXIT_BITMAP3                = 0x00002022,
+       EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+       VMREAD_BITMAP                   = 0x00002026,
+       VMREAD_BITMAP_HIGH              = 0x00002027,
+       VMWRITE_BITMAP                  = 0x00002028,
+       VMWRITE_BITMAP_HIGH             = 0x00002029,
+       XSS_EXIT_BITMAP                 = 0x0000202C,
+       XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
+       ENCLS_EXITING_BITMAP            = 0x0000202E,
+       ENCLS_EXITING_BITMAP_HIGH       = 0x0000202F,
+       TSC_MULTIPLIER                  = 0x00002032,
+       TSC_MULTIPLIER_HIGH             = 0x00002033,
+       GUEST_PHYSICAL_ADDRESS          = 0x00002400,
+       GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+       VMCS_LINK_POINTER               = 0x00002800,
+       VMCS_LINK_POINTER_HIGH          = 0x00002801,
+       GUEST_IA32_DEBUGCTL             = 0x00002802,
+       GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+       GUEST_IA32_PAT                  = 0x00002804,
+       GUEST_IA32_PAT_HIGH             = 0x00002805,
+       GUEST_IA32_EFER                 = 0x00002806,
+       GUEST_IA32_EFER_HIGH            = 0x00002807,
+       GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
+       GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+       GUEST_PDPTR0                    = 0x0000280a,
+       GUEST_PDPTR0_HIGH               = 0x0000280b,
+       GUEST_PDPTR1                    = 0x0000280c,
+       GUEST_PDPTR1_HIGH               = 0x0000280d,
+       GUEST_PDPTR2                    = 0x0000280e,
+       GUEST_PDPTR2_HIGH               = 0x0000280f,
+       GUEST_PDPTR3                    = 0x00002810,
+       GUEST_PDPTR3_HIGH               = 0x00002811,
+       GUEST_BNDCFGS                   = 0x00002812,
+       GUEST_BNDCFGS_HIGH              = 0x00002813,
+       HOST_IA32_PAT                   = 0x00002c00,
+       HOST_IA32_PAT_HIGH              = 0x00002c01,
+       HOST_IA32_EFER                  = 0x00002c02,
+       HOST_IA32_EFER_HIGH             = 0x00002c03,
+       HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
+       HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+       PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+       CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+       EXCEPTION_BITMAP                = 0x00004004,
+       PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+       PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+       CR3_TARGET_COUNT                = 0x0000400a,
+       VM_EXIT_CONTROLS                = 0x0000400c,
+       VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+       VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+       VM_ENTRY_CONTROLS               = 0x00004012,
+       VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+       VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+       VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+       VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+       TPR_THRESHOLD                   = 0x0000401c,
+       SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+       PLE_GAP                         = 0x00004020,
+       PLE_WINDOW                      = 0x00004022,
+       VM_INSTRUCTION_ERROR            = 0x00004400,
+       VM_EXIT_REASON                  = 0x00004402,
+       VM_EXIT_INTR_INFO               = 0x00004404,
+       VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+       IDT_VECTORING_INFO_FIELD        = 0x00004408,
+       IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+       VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+       VMX_INSTRUCTION_INFO            = 0x0000440e,
+       GUEST_ES_LIMIT                  = 0x00004800,
+       GUEST_CS_LIMIT                  = 0x00004802,
+       GUEST_SS_LIMIT                  = 0x00004804,
+       GUEST_DS_LIMIT                  = 0x00004806,
+       GUEST_FS_LIMIT                  = 0x00004808,
+       GUEST_GS_LIMIT                  = 0x0000480a,
+       GUEST_LDTR_LIMIT                = 0x0000480c,
+       GUEST_TR_LIMIT                  = 0x0000480e,
+       GUEST_GDTR_LIMIT                = 0x00004810,
+       GUEST_IDTR_LIMIT                = 0x00004812,
+       GUEST_ES_AR_BYTES               = 0x00004814,
+       GUEST_CS_AR_BYTES               = 0x00004816,
+       GUEST_SS_AR_BYTES               = 0x00004818,
+       GUEST_DS_AR_BYTES               = 0x0000481a,
+       GUEST_FS_AR_BYTES               = 0x0000481c,
+       GUEST_GS_AR_BYTES               = 0x0000481e,
+       GUEST_LDTR_AR_BYTES             = 0x00004820,
+       GUEST_TR_AR_BYTES               = 0x00004822,
+       GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+       GUEST_ACTIVITY_STATE            = 0X00004826,
+       GUEST_SYSENTER_CS               = 0x0000482A,
+       VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+       HOST_IA32_SYSENTER_CS           = 0x00004c00,
+       CR0_GUEST_HOST_MASK             = 0x00006000,
+       CR4_GUEST_HOST_MASK             = 0x00006002,
+       CR0_READ_SHADOW                 = 0x00006004,
+       CR4_READ_SHADOW                 = 0x00006006,
+       CR3_TARGET_VALUE0               = 0x00006008,
+       CR3_TARGET_VALUE1               = 0x0000600a,
+       CR3_TARGET_VALUE2               = 0x0000600c,
+       CR3_TARGET_VALUE3               = 0x0000600e,
+       EXIT_QUALIFICATION              = 0x00006400,
+       GUEST_LINEAR_ADDRESS            = 0x0000640a,
+       GUEST_CR0                       = 0x00006800,
+       GUEST_CR3                       = 0x00006802,
+       GUEST_CR4                       = 0x00006804,
+       GUEST_ES_BASE                   = 0x00006806,
+       GUEST_CS_BASE                   = 0x00006808,
+       GUEST_SS_BASE                   = 0x0000680a,
+       GUEST_DS_BASE                   = 0x0000680c,
+       GUEST_FS_BASE                   = 0x0000680e,
+       GUEST_GS_BASE                   = 0x00006810,
+       GUEST_LDTR_BASE                 = 0x00006812,
+       GUEST_TR_BASE                   = 0x00006814,
+       GUEST_GDTR_BASE                 = 0x00006816,
+       GUEST_IDTR_BASE                 = 0x00006818,
+       GUEST_DR7                       = 0x0000681a,
+       GUEST_RSP                       = 0x0000681c,
+       GUEST_RIP                       = 0x0000681e,
+       GUEST_RFLAGS                    = 0x00006820,
+       GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+       GUEST_SYSENTER_ESP              = 0x00006824,
+       GUEST_SYSENTER_EIP              = 0x00006826,
+       HOST_CR0                        = 0x00006c00,
+       HOST_CR3                        = 0x00006c02,
+       HOST_CR4                        = 0x00006c04,
+       HOST_FS_BASE                    = 0x00006c06,
+       HOST_GS_BASE                    = 0x00006c08,
+       HOST_TR_BASE                    = 0x00006c0a,
+       HOST_GDTR_BASE                  = 0x00006c0c,
+       HOST_IDTR_BASE                  = 0x00006c0e,
+       HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+       HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+       HOST_RSP                        = 0x00006c14,
+       HOST_RIP                        = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+       uint32_t index;
+       uint32_t reserved;
+       uint64_t value;
+} __attribute__ ((aligned(16)));
+
+#include "evmcs.h"
+
+static inline int vmxon(uint64_t phys)
+{
+       uint8_t ret;
+
+       __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [pa]"m"(phys)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline void vmxoff(void)
+{
+       __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+       uint8_t ret;
+
+       __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [pa]"m"(vmcs_pa)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return -1;
+
+       __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [pa]"m"(vmcs_pa)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+       uint64_t tmp;
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return evmcs_vmptrst(value);
+
+       __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+               : [value]"=m"(tmp), [ret]"=rm"(ret)
+               : : "cc", "memory");
+
+       *value = tmp;
+       return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+       uint64_t value = 0;
+       vmptrst(&value);
+       return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+       int ret;
+
+       if (enable_evmcs)
+               return evmcs_vmlaunch();
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "vmwrite %%rsp, %[host_rsp];"
+                            "lea 1f(%%rip), %%rax;"
+                            "vmwrite %%rax, %[host_rip];"
+                            "vmlaunch;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"((uint64_t)HOST_RSP),
+                              [host_rip]"r"((uint64_t)HOST_RIP)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+       int ret;
+
+       if (enable_evmcs)
+               return evmcs_vmresume();
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "vmwrite %%rsp, %[host_rsp];"
+                            "lea 1f(%%rip), %%rax;"
+                            "vmwrite %%rax, %[host_rip];"
+                            "vmresume;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"((uint64_t)HOST_RSP),
+                              [host_rip]"r"((uint64_t)HOST_RIP)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+static inline void vmcall(void)
+{
+       /*
+        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+        * use of this function is to exit to L1 from L2.  Clobber all other
+        * GPRs as L1 doesn't correctly preserve them during vmexits.
+        */
+       __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
+                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
+                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                              "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+       uint64_t tmp;
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return evmcs_vmread(encoding, value);
+
+       __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+               : [value]"=rm"(tmp), [ret]"=rm"(ret)
+               : [encoding]"r"(encoding)
+               : "cc", "memory");
+
+       *value = tmp;
+       return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+       uint64_t value = 0;
+       vmread(encoding, &value);
+       return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return evmcs_vmwrite(encoding, value);
+
+       __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [value]"rm"(value), [encoding]"r"(encoding)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+       return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+       void *vmxon_hva;
+       uint64_t vmxon_gpa;
+       void *vmxon;
+
+       void *vmcs_hva;
+       uint64_t vmcs_gpa;
+       void *vmcs;
+
+       void *msr_hva;
+       uint64_t msr_gpa;
+       void *msr;
+
+       void *shadow_vmcs_hva;
+       uint64_t shadow_vmcs_gpa;
+       void *shadow_vmcs;
+
+       void *vmread_hva;
+       uint64_t vmread_gpa;
+       void *vmread;
+
+       void *vmwrite_hva;
+       uint64_t vmwrite_gpa;
+       void *vmwrite;
+
+       void *eptp_hva;
+       uint64_t eptp_gpa;
+       void *eptp;
+
+       void *apic_access_hva;
+       uint64_t apic_access_gpa;
+       void *apic_access;
+};
+
+union vmx_basic {
+       u64 val;
+       struct {
+               u32 revision;
+               u32     size:13,
+                       reserved1:3,
+                       width:1,
+                       dual:1,
+                       type:4,
+                       insouts:1,
+                       ctrl:1,
+                       vm_entry_exception_ctrl:1,
+                       reserved2:7;
+       };
+};
+
+union vmx_ctrl_msr {
+       u64 val;
+       struct {
+               u32 set, clr;
+       };
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
+
+bool ept_1g_pages_supported(void);
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                  uint64_t nested_paddr, uint64_t paddr);
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                uint64_t nested_paddr, uint64_t paddr, uint64_t size);
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+                       uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+                           uint64_t addr, uint64_t size);
+bool kvm_cpu_has_ept(void);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h

deleted file mode 100644 (file)

index 5199009..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/apic.h
- *
- * Copyright (C) 2021, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_APIC_H
-#define SELFTEST_KVM_APIC_H
-
-#include <stdint.h>
-
-#include "processor.h"
-#include "ucall_common.h"
-
-#define APIC_DEFAULT_GPA               0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE              0x0000001b
-#define MSR_IA32_APICBASE_BSP          (1<<8)
-#define MSR_IA32_APICBASE_EXTD         (1<<10)
-#define MSR_IA32_APICBASE_ENABLE       (1<<11)
-#define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
-#define                GET_APIC_BASE(x)        (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR  0x800
-#define X2APIC_ENABLE  (1UL << 10)
-#define        APIC_ID         0x20
-#define        APIC_LVR        0x30
-#define                GET_APIC_ID_FIELD(x)    (((x) >> 24) & 0xFF)
-#define        APIC_TASKPRI    0x80
-#define        APIC_PROCPRI    0xA0
-#define        APIC_EOI        0xB0
-#define        APIC_SPIV       0xF0
-#define                APIC_SPIV_FOCUS_DISABLED        (1 << 9)
-#define                APIC_SPIV_APIC_ENABLED          (1 << 8)
-#define APIC_IRR       0x200
-#define        APIC_ICR        0x300
-#define        APIC_LVTCMCI    0x2f0
-#define                APIC_DEST_SELF          0x40000
-#define                APIC_DEST_ALLINC        0x80000
-#define                APIC_DEST_ALLBUT        0xC0000
-#define                APIC_ICR_RR_MASK        0x30000
-#define                APIC_ICR_RR_INVALID     0x00000
-#define                APIC_ICR_RR_INPROG      0x10000
-#define                APIC_ICR_RR_VALID       0x20000
-#define                APIC_INT_LEVELTRIG      0x08000
-#define                APIC_INT_ASSERT         0x04000
-#define                APIC_ICR_BUSY           0x01000
-#define                APIC_DEST_LOGICAL       0x00800
-#define                APIC_DEST_PHYSICAL      0x00000
-#define                APIC_DM_FIXED           0x00000
-#define                APIC_DM_FIXED_MASK      0x00700
-#define                APIC_DM_LOWEST          0x00100
-#define                APIC_DM_SMI             0x00200
-#define                APIC_DM_REMRD           0x00300
-#define                APIC_DM_NMI             0x00400
-#define                APIC_DM_INIT            0x00500
-#define                APIC_DM_STARTUP         0x00600
-#define                APIC_DM_EXTINT          0x00700
-#define                APIC_VECTOR_MASK        0x000FF
-#define        APIC_ICR2       0x310
-#define                SET_APIC_DEST_FIELD(x)  ((x) << 24)
-#define APIC_LVTT      0x320
-#define                APIC_LVT_TIMER_ONESHOT          (0 << 17)
-#define                APIC_LVT_TIMER_PERIODIC         (1 << 17)
-#define                APIC_LVT_TIMER_TSCDEADLINE      (2 << 17)
-#define                APIC_LVT_MASKED                 (1 << 16)
-#define        APIC_TMICT      0x380
-#define        APIC_TMCCT      0x390
-#define        APIC_TDCR       0x3E0
-
-void apic_disable(void);
-void xapic_enable(void);
-void x2apic_enable(void);
-
-static inline uint32_t get_bsp_flag(void)
-{
-       return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
-static inline uint32_t xapic_read_reg(unsigned int reg)
-{
-       return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
-}
-
-static inline void xapic_write_reg(unsigned int reg, uint32_t val)
-{
-       ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
-}
-
-static inline uint64_t x2apic_read_reg(unsigned int reg)
-{
-       return rdmsr(APIC_BASE_MSR + (reg >> 4));
-}
-
-static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
-{
-       return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
-}
-
-static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
-{
-       uint8_t fault = x2apic_write_reg_safe(reg, value);
-
-       __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
-                      fault, APIC_BASE_MSR + (reg >> 4), value);
-}
-
-static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
-{
-       uint8_t fault = x2apic_write_reg_safe(reg, value);
-
-       __GUEST_ASSERT(fault == GP_VECTOR,
-                      "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
-                      APIC_BASE_MSR + (reg >> 4), value, fault);
-}
-
-
-#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h

deleted file mode 100644 (file)

index 901caf0..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ /dev/null
@@ -1,1279 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/evmcs.h
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- */
-
-#ifndef SELFTEST_KVM_EVMCS_H
-#define SELFTEST_KVM_EVMCS_H
-
-#include <stdint.h>
-#include "hyperv.h"
-#include "vmx.h"
-
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#define EVMCS_VERSION 1
-
-extern bool enable_evmcs;
-
-struct hv_enlightened_vmcs {
-       u32 revision_id;
-       u32 abort;
-
-       u16 host_es_selector;
-       u16 host_cs_selector;
-       u16 host_ss_selector;
-       u16 host_ds_selector;
-       u16 host_fs_selector;
-       u16 host_gs_selector;
-       u16 host_tr_selector;
-
-       u16 padding16_1;
-
-       u64 host_ia32_pat;
-       u64 host_ia32_efer;
-
-       u64 host_cr0;
-       u64 host_cr3;
-       u64 host_cr4;
-
-       u64 host_ia32_sysenter_esp;
-       u64 host_ia32_sysenter_eip;
-       u64 host_rip;
-       u32 host_ia32_sysenter_cs;
-
-       u32 pin_based_vm_exec_control;
-       u32 vm_exit_controls;
-       u32 secondary_vm_exec_control;
-
-       u64 io_bitmap_a;
-       u64 io_bitmap_b;
-       u64 msr_bitmap;
-
-       u16 guest_es_selector;
-       u16 guest_cs_selector;
-       u16 guest_ss_selector;
-       u16 guest_ds_selector;
-       u16 guest_fs_selector;
-       u16 guest_gs_selector;
-       u16 guest_ldtr_selector;
-       u16 guest_tr_selector;
-
-       u32 guest_es_limit;
-       u32 guest_cs_limit;
-       u32 guest_ss_limit;
-       u32 guest_ds_limit;
-       u32 guest_fs_limit;
-       u32 guest_gs_limit;
-       u32 guest_ldtr_limit;
-       u32 guest_tr_limit;
-       u32 guest_gdtr_limit;
-       u32 guest_idtr_limit;
-
-       u32 guest_es_ar_bytes;
-       u32 guest_cs_ar_bytes;
-       u32 guest_ss_ar_bytes;
-       u32 guest_ds_ar_bytes;
-       u32 guest_fs_ar_bytes;
-       u32 guest_gs_ar_bytes;
-       u32 guest_ldtr_ar_bytes;
-       u32 guest_tr_ar_bytes;
-
-       u64 guest_es_base;
-       u64 guest_cs_base;
-       u64 guest_ss_base;
-       u64 guest_ds_base;
-       u64 guest_fs_base;
-       u64 guest_gs_base;
-       u64 guest_ldtr_base;
-       u64 guest_tr_base;
-       u64 guest_gdtr_base;
-       u64 guest_idtr_base;
-
-       u64 padding64_1[3];
-
-       u64 vm_exit_msr_store_addr;
-       u64 vm_exit_msr_load_addr;
-       u64 vm_entry_msr_load_addr;
-
-       u64 cr3_target_value0;
-       u64 cr3_target_value1;
-       u64 cr3_target_value2;
-       u64 cr3_target_value3;
-
-       u32 page_fault_error_code_mask;
-       u32 page_fault_error_code_match;
-
-       u32 cr3_target_count;
-       u32 vm_exit_msr_store_count;
-       u32 vm_exit_msr_load_count;
-       u32 vm_entry_msr_load_count;
-
-       u64 tsc_offset;
-       u64 virtual_apic_page_addr;
-       u64 vmcs_link_pointer;
-
-       u64 guest_ia32_debugctl;
-       u64 guest_ia32_pat;
-       u64 guest_ia32_efer;
-
-       u64 guest_pdptr0;
-       u64 guest_pdptr1;
-       u64 guest_pdptr2;
-       u64 guest_pdptr3;
-
-       u64 guest_pending_dbg_exceptions;
-       u64 guest_sysenter_esp;
-       u64 guest_sysenter_eip;
-
-       u32 guest_activity_state;
-       u32 guest_sysenter_cs;
-
-       u64 cr0_guest_host_mask;
-       u64 cr4_guest_host_mask;
-       u64 cr0_read_shadow;
-       u64 cr4_read_shadow;
-       u64 guest_cr0;
-       u64 guest_cr3;
-       u64 guest_cr4;
-       u64 guest_dr7;
-
-       u64 host_fs_base;
-       u64 host_gs_base;
-       u64 host_tr_base;
-       u64 host_gdtr_base;
-       u64 host_idtr_base;
-       u64 host_rsp;
-
-       u64 ept_pointer;
-
-       u16 virtual_processor_id;
-       u16 padding16_2[3];
-
-       u64 padding64_2[5];
-       u64 guest_physical_address;
-
-       u32 vm_instruction_error;
-       u32 vm_exit_reason;
-       u32 vm_exit_intr_info;
-       u32 vm_exit_intr_error_code;
-       u32 idt_vectoring_info_field;
-       u32 idt_vectoring_error_code;
-       u32 vm_exit_instruction_len;
-       u32 vmx_instruction_info;
-
-       u64 exit_qualification;
-       u64 exit_io_instruction_ecx;
-       u64 exit_io_instruction_esi;
-       u64 exit_io_instruction_edi;
-       u64 exit_io_instruction_eip;
-
-       u64 guest_linear_address;
-       u64 guest_rsp;
-       u64 guest_rflags;
-
-       u32 guest_interruptibility_info;
-       u32 cpu_based_vm_exec_control;
-       u32 exception_bitmap;
-       u32 vm_entry_controls;
-       u32 vm_entry_intr_info_field;
-       u32 vm_entry_exception_error_code;
-       u32 vm_entry_instruction_len;
-       u32 tpr_threshold;
-
-       u64 guest_rip;
-
-       u32 hv_clean_fields;
-       u32 padding32_1;
-       u32 hv_synthetic_controls;
-       struct {
-               u32 nested_flush_hypercall:1;
-               u32 msr_bitmap:1;
-               u32 reserved:30;
-       }  __packed hv_enlightenments_control;
-       u32 hv_vp_id;
-       u32 padding32_2;
-       u64 hv_vm_id;
-       u64 partition_assist_page;
-       u64 padding64_4[4];
-       u64 guest_bndcfgs;
-       u64 guest_ia32_perf_global_ctrl;
-       u64 guest_ia32_s_cet;
-       u64 guest_ssp;
-       u64 guest_ia32_int_ssp_table_addr;
-       u64 guest_ia32_lbr_ctl;
-       u64 padding64_5[2];
-       u64 xss_exit_bitmap;
-       u64 encls_exiting_bitmap;
-       u64 host_ia32_perf_global_ctrl;
-       u64 tsc_multiplier;
-       u64 host_ia32_s_cet;
-       u64 host_ssp;
-       u64 host_ia32_int_ssp_table_addr;
-       u64 padding64_6;
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE                     0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP                BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP               BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2             BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1             BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC             BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT            BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY            BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN            BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR                     BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT             BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC              BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1               BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2               BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER             BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1                BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
-
-#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
-
-extern struct hv_enlightened_vmcs *current_evmcs;
-
-int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
-
-static inline void evmcs_enable(void)
-{
-       enable_evmcs = true;
-}
-
-static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
-{
-       current_vp_assist->current_nested_vmcs = vmcs_pa;
-       current_vp_assist->enlighten_vmentry = 1;
-
-       current_evmcs = vmcs;
-
-       return 0;
-}
-
-static inline bool load_evmcs(struct hyperv_test_pages *hv)
-{
-       if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
-               return false;
-
-       current_evmcs->revision_id = EVMCS_VERSION;
-
-       return true;
-}
-
-static inline int evmcs_vmptrst(uint64_t *value)
-{
-       *value = current_vp_assist->current_nested_vmcs &
-               ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-       return 0;
-}
-
-static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
-{
-       switch (encoding) {
-       case GUEST_RIP:
-               *value = current_evmcs->guest_rip;
-               break;
-       case GUEST_RSP:
-               *value = current_evmcs->guest_rsp;
-               break;
-       case GUEST_RFLAGS:
-               *value = current_evmcs->guest_rflags;
-               break;
-       case HOST_IA32_PAT:
-               *value = current_evmcs->host_ia32_pat;
-               break;
-       case HOST_IA32_EFER:
-               *value = current_evmcs->host_ia32_efer;
-               break;
-       case HOST_CR0:
-               *value = current_evmcs->host_cr0;
-               break;
-       case HOST_CR3:
-               *value = current_evmcs->host_cr3;
-               break;
-       case HOST_CR4:
-               *value = current_evmcs->host_cr4;
-               break;
-       case HOST_IA32_SYSENTER_ESP:
-               *value = current_evmcs->host_ia32_sysenter_esp;
-               break;
-       case HOST_IA32_SYSENTER_EIP:
-               *value = current_evmcs->host_ia32_sysenter_eip;
-               break;
-       case HOST_RIP:
-               *value = current_evmcs->host_rip;
-               break;
-       case IO_BITMAP_A:
-               *value = current_evmcs->io_bitmap_a;
-               break;
-       case IO_BITMAP_B:
-               *value = current_evmcs->io_bitmap_b;
-               break;
-       case MSR_BITMAP:
-               *value = current_evmcs->msr_bitmap;
-               break;
-       case GUEST_ES_BASE:
-               *value = current_evmcs->guest_es_base;
-               break;
-       case GUEST_CS_BASE:
-               *value = current_evmcs->guest_cs_base;
-               break;
-       case GUEST_SS_BASE:
-               *value = current_evmcs->guest_ss_base;
-               break;
-       case GUEST_DS_BASE:
-               *value = current_evmcs->guest_ds_base;
-               break;
-       case GUEST_FS_BASE:
-               *value = current_evmcs->guest_fs_base;
-               break;
-       case GUEST_GS_BASE:
-               *value = current_evmcs->guest_gs_base;
-               break;
-       case GUEST_LDTR_BASE:
-               *value = current_evmcs->guest_ldtr_base;
-               break;
-       case GUEST_TR_BASE:
-               *value = current_evmcs->guest_tr_base;
-               break;
-       case GUEST_GDTR_BASE:
-               *value = current_evmcs->guest_gdtr_base;
-               break;
-       case GUEST_IDTR_BASE:
-               *value = current_evmcs->guest_idtr_base;
-               break;
-       case TSC_OFFSET:
-               *value = current_evmcs->tsc_offset;
-               break;
-       case VIRTUAL_APIC_PAGE_ADDR:
-               *value = current_evmcs->virtual_apic_page_addr;
-               break;
-       case VMCS_LINK_POINTER:
-               *value = current_evmcs->vmcs_link_pointer;
-               break;
-       case GUEST_IA32_DEBUGCTL:
-               *value = current_evmcs->guest_ia32_debugctl;
-               break;
-       case GUEST_IA32_PAT:
-               *value = current_evmcs->guest_ia32_pat;
-               break;
-       case GUEST_IA32_EFER:
-               *value = current_evmcs->guest_ia32_efer;
-               break;
-       case GUEST_PDPTR0:
-               *value = current_evmcs->guest_pdptr0;
-               break;
-       case GUEST_PDPTR1:
-               *value = current_evmcs->guest_pdptr1;
-               break;
-       case GUEST_PDPTR2:
-               *value = current_evmcs->guest_pdptr2;
-               break;
-       case GUEST_PDPTR3:
-               *value = current_evmcs->guest_pdptr3;
-               break;
-       case GUEST_PENDING_DBG_EXCEPTIONS:
-               *value = current_evmcs->guest_pending_dbg_exceptions;
-               break;
-       case GUEST_SYSENTER_ESP:
-               *value = current_evmcs->guest_sysenter_esp;
-               break;
-       case GUEST_SYSENTER_EIP:
-               *value = current_evmcs->guest_sysenter_eip;
-               break;
-       case CR0_GUEST_HOST_MASK:
-               *value = current_evmcs->cr0_guest_host_mask;
-               break;
-       case CR4_GUEST_HOST_MASK:
-               *value = current_evmcs->cr4_guest_host_mask;
-               break;
-       case CR0_READ_SHADOW:
-               *value = current_evmcs->cr0_read_shadow;
-               break;
-       case CR4_READ_SHADOW:
-               *value = current_evmcs->cr4_read_shadow;
-               break;
-       case GUEST_CR0:
-               *value = current_evmcs->guest_cr0;
-               break;
-       case GUEST_CR3:
-               *value = current_evmcs->guest_cr3;
-               break;
-       case GUEST_CR4:
-               *value = current_evmcs->guest_cr4;
-               break;
-       case GUEST_DR7:
-               *value = current_evmcs->guest_dr7;
-               break;
-       case HOST_FS_BASE:
-               *value = current_evmcs->host_fs_base;
-               break;
-       case HOST_GS_BASE:
-               *value = current_evmcs->host_gs_base;
-               break;
-       case HOST_TR_BASE:
-               *value = current_evmcs->host_tr_base;
-               break;
-       case HOST_GDTR_BASE:
-               *value = current_evmcs->host_gdtr_base;
-               break;
-       case HOST_IDTR_BASE:
-               *value = current_evmcs->host_idtr_base;
-               break;
-       case HOST_RSP:
-               *value = current_evmcs->host_rsp;
-               break;
-       case EPT_POINTER:
-               *value = current_evmcs->ept_pointer;
-               break;
-       case GUEST_BNDCFGS:
-               *value = current_evmcs->guest_bndcfgs;
-               break;
-       case XSS_EXIT_BITMAP:
-               *value = current_evmcs->xss_exit_bitmap;
-               break;
-       case GUEST_PHYSICAL_ADDRESS:
-               *value = current_evmcs->guest_physical_address;
-               break;
-       case EXIT_QUALIFICATION:
-               *value = current_evmcs->exit_qualification;
-               break;
-       case GUEST_LINEAR_ADDRESS:
-               *value = current_evmcs->guest_linear_address;
-               break;
-       case VM_EXIT_MSR_STORE_ADDR:
-               *value = current_evmcs->vm_exit_msr_store_addr;
-               break;
-       case VM_EXIT_MSR_LOAD_ADDR:
-               *value = current_evmcs->vm_exit_msr_load_addr;
-               break;
-       case VM_ENTRY_MSR_LOAD_ADDR:
-               *value = current_evmcs->vm_entry_msr_load_addr;
-               break;
-       case CR3_TARGET_VALUE0:
-               *value = current_evmcs->cr3_target_value0;
-               break;
-       case CR3_TARGET_VALUE1:
-               *value = current_evmcs->cr3_target_value1;
-               break;
-       case CR3_TARGET_VALUE2:
-               *value = current_evmcs->cr3_target_value2;
-               break;
-       case CR3_TARGET_VALUE3:
-               *value = current_evmcs->cr3_target_value3;
-               break;
-       case TPR_THRESHOLD:
-               *value = current_evmcs->tpr_threshold;
-               break;
-       case GUEST_INTERRUPTIBILITY_INFO:
-               *value = current_evmcs->guest_interruptibility_info;
-               break;
-       case CPU_BASED_VM_EXEC_CONTROL:
-               *value = current_evmcs->cpu_based_vm_exec_control;
-               break;
-       case EXCEPTION_BITMAP:
-               *value = current_evmcs->exception_bitmap;
-               break;
-       case VM_ENTRY_CONTROLS:
-               *value = current_evmcs->vm_entry_controls;
-               break;
-       case VM_ENTRY_INTR_INFO_FIELD:
-               *value = current_evmcs->vm_entry_intr_info_field;
-               break;
-       case VM_ENTRY_EXCEPTION_ERROR_CODE:
-               *value = current_evmcs->vm_entry_exception_error_code;
-               break;
-       case VM_ENTRY_INSTRUCTION_LEN:
-               *value = current_evmcs->vm_entry_instruction_len;
-               break;
-       case HOST_IA32_SYSENTER_CS:
-               *value = current_evmcs->host_ia32_sysenter_cs;
-               break;
-       case PIN_BASED_VM_EXEC_CONTROL:
-               *value = current_evmcs->pin_based_vm_exec_control;
-               break;
-       case VM_EXIT_CONTROLS:
-               *value = current_evmcs->vm_exit_controls;
-               break;
-       case SECONDARY_VM_EXEC_CONTROL:
-               *value = current_evmcs->secondary_vm_exec_control;
-               break;
-       case GUEST_ES_LIMIT:
-               *value = current_evmcs->guest_es_limit;
-               break;
-       case GUEST_CS_LIMIT:
-               *value = current_evmcs->guest_cs_limit;
-               break;
-       case GUEST_SS_LIMIT:
-               *value = current_evmcs->guest_ss_limit;
-               break;
-       case GUEST_DS_LIMIT:
-               *value = current_evmcs->guest_ds_limit;
-               break;
-       case GUEST_FS_LIMIT:
-               *value = current_evmcs->guest_fs_limit;
-               break;
-       case GUEST_GS_LIMIT:
-               *value = current_evmcs->guest_gs_limit;
-               break;
-       case GUEST_LDTR_LIMIT:
-               *value = current_evmcs->guest_ldtr_limit;
-               break;
-       case GUEST_TR_LIMIT:
-               *value = current_evmcs->guest_tr_limit;
-               break;
-       case GUEST_GDTR_LIMIT:
-               *value = current_evmcs->guest_gdtr_limit;
-               break;
-       case GUEST_IDTR_LIMIT:
-               *value = current_evmcs->guest_idtr_limit;
-               break;
-       case GUEST_ES_AR_BYTES:
-               *value = current_evmcs->guest_es_ar_bytes;
-               break;
-       case GUEST_CS_AR_BYTES:
-               *value = current_evmcs->guest_cs_ar_bytes;
-               break;
-       case GUEST_SS_AR_BYTES:
-               *value = current_evmcs->guest_ss_ar_bytes;
-               break;
-       case GUEST_DS_AR_BYTES:
-               *value = current_evmcs->guest_ds_ar_bytes;
-               break;
-       case GUEST_FS_AR_BYTES:
-               *value = current_evmcs->guest_fs_ar_bytes;
-               break;
-       case GUEST_GS_AR_BYTES:
-               *value = current_evmcs->guest_gs_ar_bytes;
-               break;
-       case GUEST_LDTR_AR_BYTES:
-               *value = current_evmcs->guest_ldtr_ar_bytes;
-               break;
-       case GUEST_TR_AR_BYTES:
-               *value = current_evmcs->guest_tr_ar_bytes;
-               break;
-       case GUEST_ACTIVITY_STATE:
-               *value = current_evmcs->guest_activity_state;
-               break;
-       case GUEST_SYSENTER_CS:
-               *value = current_evmcs->guest_sysenter_cs;
-               break;
-       case VM_INSTRUCTION_ERROR:
-               *value = current_evmcs->vm_instruction_error;
-               break;
-       case VM_EXIT_REASON:
-               *value = current_evmcs->vm_exit_reason;
-               break;
-       case VM_EXIT_INTR_INFO:
-               *value = current_evmcs->vm_exit_intr_info;
-               break;
-       case VM_EXIT_INTR_ERROR_CODE:
-               *value = current_evmcs->vm_exit_intr_error_code;
-               break;
-       case IDT_VECTORING_INFO_FIELD:
-               *value = current_evmcs->idt_vectoring_info_field;
-               break;
-       case IDT_VECTORING_ERROR_CODE:
-               *value = current_evmcs->idt_vectoring_error_code;
-               break;
-       case VM_EXIT_INSTRUCTION_LEN:
-               *value = current_evmcs->vm_exit_instruction_len;
-               break;
-       case VMX_INSTRUCTION_INFO:
-               *value = current_evmcs->vmx_instruction_info;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MASK:
-               *value = current_evmcs->page_fault_error_code_mask;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MATCH:
-               *value = current_evmcs->page_fault_error_code_match;
-               break;
-       case CR3_TARGET_COUNT:
-               *value = current_evmcs->cr3_target_count;
-               break;
-       case VM_EXIT_MSR_STORE_COUNT:
-               *value = current_evmcs->vm_exit_msr_store_count;
-               break;
-       case VM_EXIT_MSR_LOAD_COUNT:
-               *value = current_evmcs->vm_exit_msr_load_count;
-               break;
-       case VM_ENTRY_MSR_LOAD_COUNT:
-               *value = current_evmcs->vm_entry_msr_load_count;
-               break;
-       case HOST_ES_SELECTOR:
-               *value = current_evmcs->host_es_selector;
-               break;
-       case HOST_CS_SELECTOR:
-               *value = current_evmcs->host_cs_selector;
-               break;
-       case HOST_SS_SELECTOR:
-               *value = current_evmcs->host_ss_selector;
-               break;
-       case HOST_DS_SELECTOR:
-               *value = current_evmcs->host_ds_selector;
-               break;
-       case HOST_FS_SELECTOR:
-               *value = current_evmcs->host_fs_selector;
-               break;
-       case HOST_GS_SELECTOR:
-               *value = current_evmcs->host_gs_selector;
-               break;
-       case HOST_TR_SELECTOR:
-               *value = current_evmcs->host_tr_selector;
-               break;
-       case GUEST_ES_SELECTOR:
-               *value = current_evmcs->guest_es_selector;
-               break;
-       case GUEST_CS_SELECTOR:
-               *value = current_evmcs->guest_cs_selector;
-               break;
-       case GUEST_SS_SELECTOR:
-               *value = current_evmcs->guest_ss_selector;
-               break;
-       case GUEST_DS_SELECTOR:
-               *value = current_evmcs->guest_ds_selector;
-               break;
-       case GUEST_FS_SELECTOR:
-               *value = current_evmcs->guest_fs_selector;
-               break;
-       case GUEST_GS_SELECTOR:
-               *value = current_evmcs->guest_gs_selector;
-               break;
-       case GUEST_LDTR_SELECTOR:
-               *value = current_evmcs->guest_ldtr_selector;
-               break;
-       case GUEST_TR_SELECTOR:
-               *value = current_evmcs->guest_tr_selector;
-               break;
-       case VIRTUAL_PROCESSOR_ID:
-               *value = current_evmcs->virtual_processor_id;
-               break;
-       case HOST_IA32_PERF_GLOBAL_CTRL:
-               *value = current_evmcs->host_ia32_perf_global_ctrl;
-               break;
-       case GUEST_IA32_PERF_GLOBAL_CTRL:
-               *value = current_evmcs->guest_ia32_perf_global_ctrl;
-               break;
-       case ENCLS_EXITING_BITMAP:
-               *value = current_evmcs->encls_exiting_bitmap;
-               break;
-       case TSC_MULTIPLIER:
-               *value = current_evmcs->tsc_multiplier;
-               break;
-       default: return 1;
-       }
-
-       return 0;
-}
-
-static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
-{
-       switch (encoding) {
-       case GUEST_RIP:
-               current_evmcs->guest_rip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case GUEST_RSP:
-               current_evmcs->guest_rsp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
-               break;
-       case GUEST_RFLAGS:
-               current_evmcs->guest_rflags = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
-               break;
-       case HOST_IA32_PAT:
-               current_evmcs->host_ia32_pat = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_IA32_EFER:
-               current_evmcs->host_ia32_efer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CR0:
-               current_evmcs->host_cr0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CR3:
-               current_evmcs->host_cr3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CR4:
-               current_evmcs->host_cr4 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_IA32_SYSENTER_ESP:
-               current_evmcs->host_ia32_sysenter_esp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_IA32_SYSENTER_EIP:
-               current_evmcs->host_ia32_sysenter_eip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_RIP:
-               current_evmcs->host_rip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case IO_BITMAP_A:
-               current_evmcs->io_bitmap_a = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
-               break;
-       case IO_BITMAP_B:
-               current_evmcs->io_bitmap_b = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
-               break;
-       case MSR_BITMAP:
-               current_evmcs->msr_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
-               break;
-       case GUEST_ES_BASE:
-               current_evmcs->guest_es_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_BASE:
-               current_evmcs->guest_cs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_BASE:
-               current_evmcs->guest_ss_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_BASE:
-               current_evmcs->guest_ds_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_BASE:
-               current_evmcs->guest_fs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_BASE:
-               current_evmcs->guest_gs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_BASE:
-               current_evmcs->guest_ldtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_BASE:
-               current_evmcs->guest_tr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GDTR_BASE:
-               current_evmcs->guest_gdtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_IDTR_BASE:
-               current_evmcs->guest_idtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case TSC_OFFSET:
-               current_evmcs->tsc_offset = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case VIRTUAL_APIC_PAGE_ADDR:
-               current_evmcs->virtual_apic_page_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case VMCS_LINK_POINTER:
-               current_evmcs->vmcs_link_pointer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_IA32_DEBUGCTL:
-               current_evmcs->guest_ia32_debugctl = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_IA32_PAT:
-               current_evmcs->guest_ia32_pat = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_IA32_EFER:
-               current_evmcs->guest_ia32_efer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR0:
-               current_evmcs->guest_pdptr0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR1:
-               current_evmcs->guest_pdptr1 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR2:
-               current_evmcs->guest_pdptr2 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR3:
-               current_evmcs->guest_pdptr3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PENDING_DBG_EXCEPTIONS:
-               current_evmcs->guest_pending_dbg_exceptions = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_SYSENTER_ESP:
-               current_evmcs->guest_sysenter_esp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_SYSENTER_EIP:
-               current_evmcs->guest_sysenter_eip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case CR0_GUEST_HOST_MASK:
-               current_evmcs->cr0_guest_host_mask = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case CR4_GUEST_HOST_MASK:
-               current_evmcs->cr4_guest_host_mask = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case CR0_READ_SHADOW:
-               current_evmcs->cr0_read_shadow = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case CR4_READ_SHADOW:
-               current_evmcs->cr4_read_shadow = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_CR0:
-               current_evmcs->guest_cr0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_CR3:
-               current_evmcs->guest_cr3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_CR4:
-               current_evmcs->guest_cr4 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_DR7:
-               current_evmcs->guest_dr7 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case HOST_FS_BASE:
-               current_evmcs->host_fs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_GS_BASE:
-               current_evmcs->host_gs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_TR_BASE:
-               current_evmcs->host_tr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_GDTR_BASE:
-               current_evmcs->host_gdtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_IDTR_BASE:
-               current_evmcs->host_idtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_RSP:
-               current_evmcs->host_rsp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case EPT_POINTER:
-               current_evmcs->ept_pointer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
-               break;
-       case GUEST_BNDCFGS:
-               current_evmcs->guest_bndcfgs = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case XSS_EXIT_BITMAP:
-               current_evmcs->xss_exit_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case GUEST_PHYSICAL_ADDRESS:
-               current_evmcs->guest_physical_address = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case EXIT_QUALIFICATION:
-               current_evmcs->exit_qualification = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case GUEST_LINEAR_ADDRESS:
-               current_evmcs->guest_linear_address = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_MSR_STORE_ADDR:
-               current_evmcs->vm_exit_msr_store_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_EXIT_MSR_LOAD_ADDR:
-               current_evmcs->vm_exit_msr_load_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_ENTRY_MSR_LOAD_ADDR:
-               current_evmcs->vm_entry_msr_load_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE0:
-               current_evmcs->cr3_target_value0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE1:
-               current_evmcs->cr3_target_value1 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE2:
-               current_evmcs->cr3_target_value2 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE3:
-               current_evmcs->cr3_target_value3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case TPR_THRESHOLD:
-               current_evmcs->tpr_threshold = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case GUEST_INTERRUPTIBILITY_INFO:
-               current_evmcs->guest_interruptibility_info = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
-               break;
-       case CPU_BASED_VM_EXEC_CONTROL:
-               current_evmcs->cpu_based_vm_exec_control = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
-               break;
-       case EXCEPTION_BITMAP:
-               current_evmcs->exception_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
-               break;
-       case VM_ENTRY_CONTROLS:
-               current_evmcs->vm_entry_controls = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
-               break;
-       case VM_ENTRY_INTR_INFO_FIELD:
-               current_evmcs->vm_entry_intr_info_field = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
-               break;
-       case VM_ENTRY_EXCEPTION_ERROR_CODE:
-               current_evmcs->vm_entry_exception_error_code = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
-               break;
-       case VM_ENTRY_INSTRUCTION_LEN:
-               current_evmcs->vm_entry_instruction_len = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
-               break;
-       case HOST_IA32_SYSENTER_CS:
-               current_evmcs->host_ia32_sysenter_cs = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case PIN_BASED_VM_EXEC_CONTROL:
-               current_evmcs->pin_based_vm_exec_control = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
-               break;
-       case VM_EXIT_CONTROLS:
-               current_evmcs->vm_exit_controls = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
-               break;
-       case SECONDARY_VM_EXEC_CONTROL:
-               current_evmcs->secondary_vm_exec_control = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
-               break;
-       case GUEST_ES_LIMIT:
-               current_evmcs->guest_es_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_LIMIT:
-               current_evmcs->guest_cs_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_LIMIT:
-               current_evmcs->guest_ss_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_LIMIT:
-               current_evmcs->guest_ds_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_LIMIT:
-               current_evmcs->guest_fs_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_LIMIT:
-               current_evmcs->guest_gs_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_LIMIT:
-               current_evmcs->guest_ldtr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_LIMIT:
-               current_evmcs->guest_tr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GDTR_LIMIT:
-               current_evmcs->guest_gdtr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_IDTR_LIMIT:
-               current_evmcs->guest_idtr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_ES_AR_BYTES:
-               current_evmcs->guest_es_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_AR_BYTES:
-               current_evmcs->guest_cs_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_AR_BYTES:
-               current_evmcs->guest_ss_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_AR_BYTES:
-               current_evmcs->guest_ds_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_AR_BYTES:
-               current_evmcs->guest_fs_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_AR_BYTES:
-               current_evmcs->guest_gs_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_AR_BYTES:
-               current_evmcs->guest_ldtr_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_AR_BYTES:
-               current_evmcs->guest_tr_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_ACTIVITY_STATE:
-               current_evmcs->guest_activity_state = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_SYSENTER_CS:
-               current_evmcs->guest_sysenter_cs = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case VM_INSTRUCTION_ERROR:
-               current_evmcs->vm_instruction_error = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_REASON:
-               current_evmcs->vm_exit_reason = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_INTR_INFO:
-               current_evmcs->vm_exit_intr_info = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_INTR_ERROR_CODE:
-               current_evmcs->vm_exit_intr_error_code = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case IDT_VECTORING_INFO_FIELD:
-               current_evmcs->idt_vectoring_info_field = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case IDT_VECTORING_ERROR_CODE:
-               current_evmcs->idt_vectoring_error_code = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_INSTRUCTION_LEN:
-               current_evmcs->vm_exit_instruction_len = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VMX_INSTRUCTION_INFO:
-               current_evmcs->vmx_instruction_info = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MASK:
-               current_evmcs->page_fault_error_code_mask = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MATCH:
-               current_evmcs->page_fault_error_code_match = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_COUNT:
-               current_evmcs->cr3_target_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_EXIT_MSR_STORE_COUNT:
-               current_evmcs->vm_exit_msr_store_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_EXIT_MSR_LOAD_COUNT:
-               current_evmcs->vm_exit_msr_load_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_ENTRY_MSR_LOAD_COUNT:
-               current_evmcs->vm_entry_msr_load_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case HOST_ES_SELECTOR:
-               current_evmcs->host_es_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CS_SELECTOR:
-               current_evmcs->host_cs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_SS_SELECTOR:
-               current_evmcs->host_ss_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_DS_SELECTOR:
-               current_evmcs->host_ds_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_FS_SELECTOR:
-               current_evmcs->host_fs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_GS_SELECTOR:
-               current_evmcs->host_gs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_TR_SELECTOR:
-               current_evmcs->host_tr_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case GUEST_ES_SELECTOR:
-               current_evmcs->guest_es_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_SELECTOR:
-               current_evmcs->guest_cs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_SELECTOR:
-               current_evmcs->guest_ss_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_SELECTOR:
-               current_evmcs->guest_ds_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_SELECTOR:
-               current_evmcs->guest_fs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_SELECTOR:
-               current_evmcs->guest_gs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_SELECTOR:
-               current_evmcs->guest_ldtr_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_SELECTOR:
-               current_evmcs->guest_tr_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case VIRTUAL_PROCESSOR_ID:
-               current_evmcs->virtual_processor_id = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
-               break;
-       case HOST_IA32_PERF_GLOBAL_CTRL:
-               current_evmcs->host_ia32_perf_global_ctrl = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case GUEST_IA32_PERF_GLOBAL_CTRL:
-               current_evmcs->guest_ia32_perf_global_ctrl = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case ENCLS_EXITING_BITMAP:
-               current_evmcs->encls_exiting_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case TSC_MULTIPLIER:
-               current_evmcs->tsc_multiplier = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       default: return 1;
-       }
-
-       return 0;
-}
-
-static inline int evmcs_vmlaunch(void)
-{
-       int ret;
-
-       current_evmcs->hv_clean_fields = 0;
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "mov %%rsp, (%[host_rsp]);"
-                            "lea 1f(%%rip), %%rax;"
-                            "mov %%rax, (%[host_rip]);"
-                            "vmlaunch;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"
-                              ((uint64_t)&current_evmcs->host_rsp),
-                              [host_rip]"r"
-                              ((uint64_t)&current_evmcs->host_rip)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmresume.
- */
-static inline int evmcs_vmresume(void)
-{
-       int ret;
-
-       /* HOST_RIP */
-       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-       /* HOST_RSP */
-       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "mov %%rsp, (%[host_rsp]);"
-                            "lea 1f(%%rip), %%rax;"
-                            "mov %%rax, (%[host_rip]);"
-                            "vmresume;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"
-                              ((uint64_t)&current_evmcs->host_rsp),
-                              [host_rip]"r"
-                              ((uint64_t)&current_evmcs->host_rip)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h

deleted file mode 100644 (file)

index 6849e25..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/hyperv.h
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- */
-
-#ifndef SELFTEST_KVM_HYPERV_H
-#define SELFTEST_KVM_HYPERV_H
-
-#include "processor.h"
-
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS  0x40000000
-#define HYPERV_CPUID_INTERFACE                 0x40000001
-#define HYPERV_CPUID_VERSION                   0x40000002
-#define HYPERV_CPUID_FEATURES                  0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO          0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS          0x40000005
-#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES   0x40000007
-#define HYPERV_CPUID_NESTED_FEATURES           0x4000000A
-#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS   0x40000080
-#define HYPERV_CPUID_SYNDBG_INTERFACE                  0x40000081
-#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES      0x40000082
-
-#define HV_X64_MSR_GUEST_OS_ID                 0x40000000
-#define HV_X64_MSR_HYPERCALL                   0x40000001
-#define HV_X64_MSR_VP_INDEX                    0x40000002
-#define HV_X64_MSR_RESET                       0x40000003
-#define HV_X64_MSR_VP_RUNTIME                  0x40000010
-#define HV_X64_MSR_TIME_REF_COUNT              0x40000020
-#define HV_X64_MSR_REFERENCE_TSC               0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY               0x40000022
-#define HV_X64_MSR_APIC_FREQUENCY              0x40000023
-#define HV_X64_MSR_EOI                         0x40000070
-#define HV_X64_MSR_ICR                         0x40000071
-#define HV_X64_MSR_TPR                         0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
-#define HV_X64_MSR_SCONTROL                    0x40000080
-#define HV_X64_MSR_SVERSION                    0x40000081
-#define HV_X64_MSR_SIEFP                       0x40000082
-#define HV_X64_MSR_SIMP                                0x40000083
-#define HV_X64_MSR_EOM                         0x40000084
-#define HV_X64_MSR_SINT0                       0x40000090
-#define HV_X64_MSR_SINT1                       0x40000091
-#define HV_X64_MSR_SINT2                       0x40000092
-#define HV_X64_MSR_SINT3                       0x40000093
-#define HV_X64_MSR_SINT4                       0x40000094
-#define HV_X64_MSR_SINT5                       0x40000095
-#define HV_X64_MSR_SINT6                       0x40000096
-#define HV_X64_MSR_SINT7                       0x40000097
-#define HV_X64_MSR_SINT8                       0x40000098
-#define HV_X64_MSR_SINT9                       0x40000099
-#define HV_X64_MSR_SINT10                      0x4000009A
-#define HV_X64_MSR_SINT11                      0x4000009B
-#define HV_X64_MSR_SINT12                      0x4000009C
-#define HV_X64_MSR_SINT13                      0x4000009D
-#define HV_X64_MSR_SINT14                      0x4000009E
-#define HV_X64_MSR_SINT15                      0x4000009F
-#define HV_X64_MSR_STIMER0_CONFIG              0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT               0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG              0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT               0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG              0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT               0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT               0x400000B7
-#define HV_X64_MSR_GUEST_IDLE                  0x400000F0
-#define HV_X64_MSR_CRASH_P0                    0x40000100
-#define HV_X64_MSR_CRASH_P1                    0x40000101
-#define HV_X64_MSR_CRASH_P2                    0x40000102
-#define HV_X64_MSR_CRASH_P3                    0x40000103
-#define HV_X64_MSR_CRASH_P4                    0x40000104
-#define HV_X64_MSR_CRASH_CTL                   0x40000105
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL     0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL       0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS                0x40000108
-#define HV_X64_MSR_TSC_INVARIANT_CONTROL       0x40000118
-
-#define HV_X64_MSR_SYNDBG_CONTROL              0x400000F1
-#define HV_X64_MSR_SYNDBG_STATUS               0x400000F2
-#define HV_X64_MSR_SYNDBG_SEND_BUFFER          0x400000F3
-#define HV_X64_MSR_SYNDBG_RECV_BUFFER          0x400000F4
-#define HV_X64_MSR_SYNDBG_PENDING_BUFFER       0x400000F5
-#define HV_X64_MSR_SYNDBG_OPTIONS              0x400000FF
-
-/* HYPERV_CPUID_FEATURES.EAX */
-#define HV_MSR_VP_RUNTIME_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE                \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
-#define HV_MSR_SYNIC_AVAILABLE                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
-#define HV_MSR_SYNTIMER_AVAILABLE              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
-#define HV_MSR_APIC_ACCESS_AVAILABLE           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
-#define HV_MSR_HYPERCALL_AVAILABLE             \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
-#define HV_MSR_VP_INDEX_AVAILABLE              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
-#define HV_MSR_RESET_AVAILABLE                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
-#define HV_MSR_STAT_PAGES_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
-#define HV_MSR_REFERENCE_TSC_AVAILABLE         \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
-#define HV_MSR_GUEST_IDLE_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
-#define HV_ACCESS_FREQUENCY_MSRS               \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
-#define HV_ACCESS_REENLIGHTENMENT              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
-#define HV_ACCESS_TSC_INVARIANT                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
-
-/* HYPERV_CPUID_FEATURES.EBX */
-#define HV_CREATE_PARTITIONS                   \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
-#define HV_ACCESS_PARTITION_ID                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
-#define HV_ACCESS_MEMORY_POOL                  \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
-#define HV_ADJUST_MESSAGE_BUFFERS              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
-#define HV_POST_MESSAGES                       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
-#define HV_SIGNAL_EVENTS                       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
-#define HV_CREATE_PORT                         \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
-#define HV_CONNECT_PORT                                \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
-#define HV_ACCESS_STATS                                \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
-#define HV_DEBUGGING                           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
-#define HV_CPU_MANAGEMENT                      \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
-#define HV_ENABLE_EXTENDED_HYPERCALLS          \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
-#define HV_ISOLATION                           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
-
-/* HYPERV_CPUID_FEATURES.EDX */
-#define HV_X64_MWAIT_AVAILABLE                         \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE               \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
-#define HV_X64_PERF_MONITOR_AVAILABLE                  \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
-#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
-#define HV_FEATURE_DEBUG_MSRS_AVAILABLE                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
-#define HV_STIMER_DIRECT_MODE_AVAILABLE                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
-
-/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
-#define HV_X64_AS_SWITCH_RECOMMENDED                   \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED             \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
-#define HV_X64_APIC_ACCESS_RECOMMENDED                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
-#define HV_X64_SYSTEM_RESET_RECOMMENDED                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
-#define HV_X64_RELAXED_TIMING_RECOMMENDED              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
-#define HV_DEPRECATING_AEOI_RECOMMENDED                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
-#define HV_X64_CLUSTER_IPI_RECOMMENDED                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED          \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
-
-/* HYPERV_CPUID_NESTED_FEATURES.EAX */
-#define HV_X64_NESTED_DIRECT_FLUSH                     \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
-#define HV_X64_NESTED_MSR_BITMAP                       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
-
-/* HYPERV_CPUID_NESTED_FEATURES.EBX */
-#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL          \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
-
-/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
-#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
-
-/* Hypercalls */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE     0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST      0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT           0x0008
-#define HVCALL_SEND_IPI                                0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
-#define HVCALL_SEND_IPI_EX                     0x0015
-#define HVCALL_GET_PARTITION_ID                        0x0046
-#define HVCALL_DEPOSIT_MEMORY                  0x0048
-#define HVCALL_CREATE_VP                       0x004e
-#define HVCALL_GET_VP_REGISTERS                        0x0050
-#define HVCALL_SET_VP_REGISTERS                        0x0051
-#define HVCALL_POST_MESSAGE                    0x005c
-#define HVCALL_SIGNAL_EVENT                    0x005d
-#define HVCALL_POST_DEBUG_DATA                 0x0069
-#define HVCALL_RETRIEVE_DEBUG_DATA             0x006a
-#define HVCALL_RESET_DEBUG_SESSION             0x006b
-#define HVCALL_ADD_LOGICAL_PROCESSOR           0x0076
-#define HVCALL_MAP_DEVICE_INTERRUPT            0x007c
-#define HVCALL_UNMAP_DEVICE_INTERRUPT          0x007d
-#define HVCALL_RETARGET_INTERRUPT              0x007e
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
-/* Extended hypercalls */
-#define HV_EXT_CALL_QUERY_CAPABILITIES         0x8001
-
-#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS                      0
-#define HV_STATUS_INVALID_HYPERCALL_CODE       2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT      3
-#define HV_STATUS_INVALID_ALIGNMENT            4
-#define HV_STATUS_INVALID_PARAMETER            5
-#define HV_STATUS_ACCESS_DENIED                        6
-#define HV_STATUS_OPERATION_DENIED             8
-#define HV_STATUS_INSUFFICIENT_MEMORY          11
-#define HV_STATUS_INVALID_PORT_ID              17
-#define HV_STATUS_INVALID_CONNECTION_ID                18
-#define HV_STATUS_INSUFFICIENT_BUFFERS         19
-
-/* hypercall options */
-#define HV_HYPERCALL_FAST_BIT          BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET    17
-#define HV_HYPERCALL_REP_COMP_OFFSET   32
-
-/*
- * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
- * is set to the hypercall status (if no exception occurred).
- */
-static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
-                                        vm_vaddr_t output_address,
-                                        uint64_t *hv_status)
-{
-       uint64_t error_code;
-       uint8_t vector;
-
-       /* Note both the hypercall and the "asm safe" clobber r9-r11. */
-       asm volatile("mov %[output_address], %%r8\n\t"
-                    KVM_ASM_SAFE("vmcall")
-                    : "=a" (*hv_status),
-                      "+c" (control), "+d" (input_address),
-                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)
-                    : [output_address] "r"(output_address),
-                      "a" (-EFAULT)
-                    : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
-       return vector;
-}
-
-/* Issue a Hyper-V hypercall and assert that it succeeded. */
-static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
-                                   vm_vaddr_t output_address)
-{
-       uint64_t hv_status;
-       uint8_t vector;
-
-       vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
-
-       GUEST_ASSERT(!vector);
-       GUEST_ASSERT((hv_status & 0xffff) == 0);
-}
-
-/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
-static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
-{
-       int i;
-
-       for (i = 0; i < n_sse_regs; i++)
-               write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
-}
-
-/* Proper HV_X64_MSR_GUEST_OS_ID value */
-#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
-
-#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE       0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT        12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
-               (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-struct hv_nested_enlightenments_control {
-       struct {
-               __u32 directhypercall:1;
-               __u32 reserved:31;
-       } features;
-       struct {
-               __u32 reserved;
-       } hypercallControls;
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
-       __u32 apic_assist;
-       __u32 reserved1;
-       __u64 vtl_control[3];
-       struct hv_nested_enlightenments_control nested_control;
-       __u8 enlighten_vmentry;
-       __u8 reserved2[7];
-       __u64 current_nested_vmcs;
-} __packed;
-
-extern struct hv_vp_assist_page *current_vp_assist;
-
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
-
-struct hyperv_test_pages {
-       /* VP assist page */
-       void *vp_assist_hva;
-       uint64_t vp_assist_gpa;
-       void *vp_assist;
-
-       /* Partition assist page */
-       void *partition_assist_hva;
-       uint64_t partition_assist_gpa;
-       void *partition_assist;
-
-       /* Enlightened VMCS */
-       void *enlightened_vmcs_hva;
-       uint64_t enlightened_vmcs_gpa;
-       void *enlightened_vmcs;
-};
-
-struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
-                                                      vm_vaddr_t *p_hv_pages_gva);
-
-/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
-#define HV_INVARIANT_TSC_EXPOSED               BIT_ULL(0)
-
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
-
-bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
-
-#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h

deleted file mode 100644 (file)

index 972bb1c..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "kvm_util_types.h"
-#include "test_util.h"
-
-extern bool is_forced_emulation_enabled;
-
-struct kvm_vm_arch {
-       vm_vaddr_t gdt;
-       vm_vaddr_t tss;
-       vm_vaddr_t idt;
-
-       uint64_t c_bit;
-       uint64_t s_bit;
-       int sev_fd;
-       bool is_pt_protected;
-};
-
-static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
-{
-       return arch->c_bit || arch->s_bit;
-}
-
-#define vm_arch_has_protected_memory(vm) \
-       __vm_arch_has_protected_memory(&(vm)->arch)
-
-#define vcpu_arch_put_guest(mem, __val)                                                        \
-do {                                                                                   \
-       const typeof(mem) val = (__val);                                                \
-                                                                                       \
-       if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) {            \
-               (mem) = val;                                                            \
-       } else if (guest_random_bool(&guest_rng)) {                                     \
-               __asm__ __volatile__(KVM_FEP "mov %1, %0"                               \
-                                    : "+m" (mem)                                       \
-                                    : "r" (val) : "memory");                           \
-       } else {                                                                        \
-               uint64_t __old = READ_ONCE(mem);                                        \
-                                                                                       \
-               __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]"       \
-                                    : [ptr] "+m" (mem), [old] "+a" (__old)             \
-                                    : [new]"r" (val) : "memory", "cc");                \
-       }                                                                               \
-} while (0)
-
-#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h

deleted file mode 100644 (file)

index 6119321..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/mce.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/mce.h
- *
- * Copyright (C) 2022, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_MCE_H
-#define SELFTEST_KVM_MCE_H
-
-#define MCG_CTL_P              BIT_ULL(8)   /* MCG_CTL register available */
-#define MCG_SER_P              BIT_ULL(24)  /* MCA recovery/new status bits */
-#define MCG_LMCE_P             BIT_ULL(27)  /* Local machine check supported */
-#define MCG_CMCI_P             BIT_ULL(10)  /* CMCI supported */
-#define KVM_MAX_MCE_BANKS 32
-#define MCG_CAP_BANKS_MASK 0xff       /* Bit 0-7 of the MCG_CAP register are #banks */
-#define MCI_STATUS_VAL (1ULL << 63)   /* valid error */
-#define MCI_STATUS_UC (1ULL << 61)    /* uncorrected error */
-#define MCI_STATUS_EN (1ULL << 60)    /* error enabled */
-#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
-#define MCM_ADDR_PHYS 2    /* physical address */
-#define MCI_CTL2_CMCI_EN               BIT_ULL(30)
-
-#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h

deleted file mode 100644 (file)

index 3c10c4d..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/pmu.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-#ifndef SELFTEST_KVM_PMU_H
-#define SELFTEST_KVM_PMU_H
-
-#include <stdint.h>
-
-#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
-
-/*
- * Encode an eventsel+umask pair into event-select MSR format.  Note, this is
- * technically AMD's format, as Intel's format only supports 8 bits for the
- * event selector, i.e. doesn't use bits 24:16 for the selector.  But, OR-ing
- * in '0' is a nop and won't clobber the CMASK.
- */
-#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) |     \
-                                   ((eventsel) & 0xff) |               \
-                                   ((umask) & 0xff) << 8)
-
-/*
- * These are technically Intel's definitions, but except for CMASK (see above),
- * AMD's layout is compatible with Intel's.
- */
-#define ARCH_PERFMON_EVENTSEL_EVENT            GENMASK_ULL(7, 0)
-#define ARCH_PERFMON_EVENTSEL_UMASK            GENMASK_ULL(15, 8)
-#define ARCH_PERFMON_EVENTSEL_USR              BIT_ULL(16)
-#define ARCH_PERFMON_EVENTSEL_OS               BIT_ULL(17)
-#define ARCH_PERFMON_EVENTSEL_EDGE             BIT_ULL(18)
-#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL      BIT_ULL(19)
-#define ARCH_PERFMON_EVENTSEL_INT              BIT_ULL(20)
-#define ARCH_PERFMON_EVENTSEL_ANY              BIT_ULL(21)
-#define ARCH_PERFMON_EVENTSEL_ENABLE           BIT_ULL(22)
-#define ARCH_PERFMON_EVENTSEL_INV              BIT_ULL(23)
-#define ARCH_PERFMON_EVENTSEL_CMASK            GENMASK_ULL(31, 24)
-
-/* RDPMC control flags, Intel only. */
-#define INTEL_RDPMC_METRICS                    BIT_ULL(29)
-#define INTEL_RDPMC_FIXED                      BIT_ULL(30)
-#define INTEL_RDPMC_FAST                       BIT_ULL(31)
-
-/* Fixed PMC controls, Intel only. */
-#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)     BIT_ULL((32 + (_idx)))
-
-#define FIXED_PMC_KERNEL                       BIT_ULL(0)
-#define FIXED_PMC_USER                         BIT_ULL(1)
-#define FIXED_PMC_ANYTHREAD                    BIT_ULL(2)
-#define FIXED_PMC_ENABLE_PMI                   BIT_ULL(3)
-#define FIXED_PMC_NR_BITS                      4
-#define FIXED_PMC_CTRL(_idx, _val)             ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
-
-#define PMU_CAP_FW_WRITES                      BIT_ULL(13)
-#define PMU_CAP_LBR_FMT                                0x3f
-
-#define        INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
-#define        INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
-#define        INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
-#define        INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
-#define        INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
-#define        INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
-#define        INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
-#define        INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
-
-#define        AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
-#define        AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
-#define        AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
-#define        AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
-
-/*
- * Note!  The order and thus the index of the architectural events matters as
- * support for each event is enumerated via CPUID using the index of the event.
- */
-enum intel_pmu_architectural_events {
-       INTEL_ARCH_CPU_CYCLES_INDEX,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
-       INTEL_ARCH_REFERENCE_CYCLES_INDEX,
-       INTEL_ARCH_LLC_REFERENCES_INDEX,
-       INTEL_ARCH_LLC_MISSES_INDEX,
-       INTEL_ARCH_BRANCHES_RETIRED_INDEX,
-       INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
-       INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
-       NR_INTEL_ARCH_EVENTS,
-};
-
-enum amd_pmu_zen_events {
-       AMD_ZEN_CORE_CYCLES_INDEX,
-       AMD_ZEN_INSTRUCTIONS_INDEX,
-       AMD_ZEN_BRANCHES_INDEX,
-       AMD_ZEN_BRANCH_MISSES_INDEX,
-       NR_AMD_ZEN_EVENTS,
-};
-
-extern const uint64_t intel_pmu_arch_events[];
-extern const uint64_t amd_pmu_zen_events[];
-
-#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h

deleted file mode 100644 (file)

index 645200e..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ /dev/null
@@ -1,1397 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/processor.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <assert.h>
-#include <stdint.h>
-#include <syscall.h>
-
-#include <asm/msr-index.h>
-#include <asm/prctl.h>
-
-#include <linux/kvm_para.h>
-#include <linux/stringify.h>
-
-#include "kvm_util.h"
-#include "ucall_common.h"
-
-extern bool host_cpu_is_intel;
-extern bool host_cpu_is_amd;
-extern uint64_t guest_tsc_khz;
-
-#ifndef MAX_NR_CPUID_ENTRIES
-#define MAX_NR_CPUID_ENTRIES 100
-#endif
-
-/* Forced emulation prefix, used to invoke the emulator unconditionally. */
-#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
-
-#define NMI_VECTOR             0x02
-
-#define X86_EFLAGS_FIXED        (1u << 1)
-
-#define X86_CR4_VME            (1ul << 0)
-#define X86_CR4_PVI            (1ul << 1)
-#define X86_CR4_TSD            (1ul << 2)
-#define X86_CR4_DE             (1ul << 3)
-#define X86_CR4_PSE            (1ul << 4)
-#define X86_CR4_PAE            (1ul << 5)
-#define X86_CR4_MCE            (1ul << 6)
-#define X86_CR4_PGE            (1ul << 7)
-#define X86_CR4_PCE            (1ul << 8)
-#define X86_CR4_OSFXSR         (1ul << 9)
-#define X86_CR4_OSXMMEXCPT     (1ul << 10)
-#define X86_CR4_UMIP           (1ul << 11)
-#define X86_CR4_LA57           (1ul << 12)
-#define X86_CR4_VMXE           (1ul << 13)
-#define X86_CR4_SMXE           (1ul << 14)
-#define X86_CR4_FSGSBASE       (1ul << 16)
-#define X86_CR4_PCIDE          (1ul << 17)
-#define X86_CR4_OSXSAVE                (1ul << 18)
-#define X86_CR4_SMEP           (1ul << 20)
-#define X86_CR4_SMAP           (1ul << 21)
-#define X86_CR4_PKE            (1ul << 22)
-
-struct xstate_header {
-       u64                             xstate_bv;
-       u64                             xcomp_bv;
-       u64                             reserved[6];
-} __attribute__((packed));
-
-struct xstate {
-       u8                              i387[512];
-       struct xstate_header            header;
-       u8                              extended_state_area[0];
-} __attribute__ ((packed, aligned (64)));
-
-#define XFEATURE_MASK_FP               BIT_ULL(0)
-#define XFEATURE_MASK_SSE              BIT_ULL(1)
-#define XFEATURE_MASK_YMM              BIT_ULL(2)
-#define XFEATURE_MASK_BNDREGS          BIT_ULL(3)
-#define XFEATURE_MASK_BNDCSR           BIT_ULL(4)
-#define XFEATURE_MASK_OPMASK           BIT_ULL(5)
-#define XFEATURE_MASK_ZMM_Hi256                BIT_ULL(6)
-#define XFEATURE_MASK_Hi16_ZMM         BIT_ULL(7)
-#define XFEATURE_MASK_PT               BIT_ULL(8)
-#define XFEATURE_MASK_PKRU             BIT_ULL(9)
-#define XFEATURE_MASK_PASID            BIT_ULL(10)
-#define XFEATURE_MASK_CET_USER         BIT_ULL(11)
-#define XFEATURE_MASK_CET_KERNEL       BIT_ULL(12)
-#define XFEATURE_MASK_LBR              BIT_ULL(15)
-#define XFEATURE_MASK_XTILE_CFG                BIT_ULL(17)
-#define XFEATURE_MASK_XTILE_DATA       BIT_ULL(18)
-
-#define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK | \
-                                        XFEATURE_MASK_ZMM_Hi256 | \
-                                        XFEATURE_MASK_Hi16_ZMM)
-#define XFEATURE_MASK_XTILE            (XFEATURE_MASK_XTILE_DATA | \
-                                        XFEATURE_MASK_XTILE_CFG)
-
-/* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
-enum cpuid_output_regs {
-       KVM_CPUID_EAX,
-       KVM_CPUID_EBX,
-       KVM_CPUID_ECX,
-       KVM_CPUID_EDX
-};
-
-/*
- * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
- * passed by value with no overhead.
- */
-struct kvm_x86_cpu_feature {
-       u32     function;
-       u16     index;
-       u8      reg;
-       u8      bit;
-};
-#define        KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)                                \
-({                                                                             \
-       struct kvm_x86_cpu_feature feature = {                                  \
-               .function = fn,                                                 \
-               .index = idx,                                                   \
-               .reg = KVM_CPUID_##gpr,                                         \
-               .bit = __bit,                                                   \
-       };                                                                      \
-                                                                               \
-       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
-                         (fn & 0xc0000000) == 0x40000000 ||                    \
-                         (fn & 0xc0000000) == 0x80000000 ||                    \
-                         (fn & 0xc0000000) == 0xc0000000);                     \
-       kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));    \
-       feature;                                                                \
-})
-
-/*
- * Basic Leafs, a.k.a. Intel defined
- */
-#define        X86_FEATURE_MWAIT               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
-#define        X86_FEATURE_VMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
-#define        X86_FEATURE_SMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
-#define        X86_FEATURE_PDCM                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
-#define        X86_FEATURE_PCID                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
-#define X86_FEATURE_X2APIC             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
-#define        X86_FEATURE_MOVBE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
-#define        X86_FEATURE_TSC_DEADLINE_TIMER  KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
-#define        X86_FEATURE_XSAVE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
-#define        X86_FEATURE_OSXSAVE             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
-#define        X86_FEATURE_RDRAND              KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
-#define        X86_FEATURE_HYPERVISOR          KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
-#define X86_FEATURE_PAE                        KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
-#define        X86_FEATURE_MCE                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
-#define        X86_FEATURE_APIC                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
-#define        X86_FEATURE_CLFLUSH             KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
-#define        X86_FEATURE_XMM                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
-#define        X86_FEATURE_XMM2                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
-#define        X86_FEATURE_FSGSBASE            KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
-#define        X86_FEATURE_TSC_ADJUST          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
-#define        X86_FEATURE_SGX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
-#define        X86_FEATURE_HLE                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
-#define        X86_FEATURE_SMEP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
-#define        X86_FEATURE_INVPCID             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
-#define        X86_FEATURE_RTM                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
-#define        X86_FEATURE_MPX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
-#define        X86_FEATURE_SMAP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
-#define        X86_FEATURE_PCOMMIT             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
-#define        X86_FEATURE_CLFLUSHOPT          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
-#define        X86_FEATURE_CLWB                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
-#define        X86_FEATURE_UMIP                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
-#define        X86_FEATURE_PKU                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
-#define        X86_FEATURE_OSPKE               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
-#define        X86_FEATURE_LA57                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
-#define        X86_FEATURE_RDPID               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
-#define        X86_FEATURE_SGX_LC              KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
-#define        X86_FEATURE_SHSTK               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
-#define        X86_FEATURE_IBT                 KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
-#define        X86_FEATURE_AMX_TILE            KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
-#define        X86_FEATURE_SPEC_CTRL           KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
-#define        X86_FEATURE_ARCH_CAPABILITIES   KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
-#define        X86_FEATURE_PKS                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
-#define        X86_FEATURE_XTILECFG            KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
-#define        X86_FEATURE_XTILEDATA           KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
-#define        X86_FEATURE_XSAVES              KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
-#define        X86_FEATURE_XFD                 KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
-#define X86_FEATURE_XTILEDATA_XFD      KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
-
-/*
- * Extended Leafs, a.k.a. AMD defined
- */
-#define        X86_FEATURE_SVM                 KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
-#define        X86_FEATURE_NX                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
-#define        X86_FEATURE_GBPAGES             KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
-#define        X86_FEATURE_RDTSCP              KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
-#define        X86_FEATURE_LM                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
-#define        X86_FEATURE_INVTSC              KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
-#define        X86_FEATURE_RDPRU               KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
-#define        X86_FEATURE_AMD_IBPB            KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
-#define        X86_FEATURE_NPT                 KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
-#define        X86_FEATURE_LBRV                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
-#define        X86_FEATURE_NRIPS               KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
-#define X86_FEATURE_TSCRATEMSR          KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
-#define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
-#define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
-#define        X86_FEATURE_VGIF                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
-#define X86_FEATURE_SEV                        KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
-#define X86_FEATURE_SEV_ES             KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
-
-/*
- * KVM defined paravirt features.
- */
-#define X86_FEATURE_KVM_CLOCKSOURCE    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
-#define X86_FEATURE_KVM_NOP_IO_DELAY   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
-#define X86_FEATURE_KVM_MMU_OP         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
-#define X86_FEATURE_KVM_CLOCKSOURCE2   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
-#define X86_FEATURE_KVM_ASYNC_PF       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
-#define X86_FEATURE_KVM_STEAL_TIME     KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
-#define X86_FEATURE_KVM_PV_EOI         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
-#define X86_FEATURE_KVM_PV_UNHALT      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
-/* Bit 8 apparently isn't used?!?! */
-#define X86_FEATURE_KVM_PV_TLB_FLUSH   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
-#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
-#define X86_FEATURE_KVM_PV_SEND_IPI    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
-#define X86_FEATURE_KVM_POLL_CONTROL   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
-#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
-#define X86_FEATURE_KVM_ASYNC_PF_INT   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
-#define X86_FEATURE_KVM_MSI_EXT_DEST_ID        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
-#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
-#define X86_FEATURE_KVM_MIGRATION_CONTROL      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
-
-/*
- * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
- * value/property as opposed to a single-bit feature.  Again, pack the info
- * into a 64-bit value to pass by value with no overhead.
- */
-struct kvm_x86_cpu_property {
-       u32     function;
-       u8      index;
-       u8      reg;
-       u8      lo_bit;
-       u8      hi_bit;
-};
-#define        KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)                   \
-({                                                                             \
-       struct kvm_x86_cpu_property property = {                                \
-               .function = fn,                                                 \
-               .index = idx,                                                   \
-               .reg = KVM_CPUID_##gpr,                                         \
-               .lo_bit = low_bit,                                              \
-               .hi_bit = high_bit,                                             \
-       };                                                                      \
-                                                                               \
-       kvm_static_assert(low_bit < high_bit);                                  \
-       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
-                         (fn & 0xc0000000) == 0x40000000 ||                    \
-                         (fn & 0xc0000000) == 0x80000000 ||                    \
-                         (fn & 0xc0000000) == 0xc0000000);                     \
-       kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));   \
-       property;                                                               \
-})
-
-#define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
-#define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
-#define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
-#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
-#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
-#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
-#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
-#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
-#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
-
-#define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
-#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
-#define X86_PROPERTY_XSTATE_MAX_SIZE           KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
-#define X86_PROPERTY_SUPPORTED_XCR0_HI         KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)
-
-#define X86_PROPERTY_XSTATE_TILE_SIZE          KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
-#define X86_PROPERTY_XSTATE_TILE_OFFSET                KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
-#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES    KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
-#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES      KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
-#define X86_PROPERTY_AMX_BYTES_PER_TILE                KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
-#define X86_PROPERTY_AMX_BYTES_PER_ROW         KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
-#define X86_PROPERTY_AMX_NR_TILE_REGS          KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
-#define X86_PROPERTY_AMX_MAX_ROWS              KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
-
-#define X86_PROPERTY_MAX_KVM_LEAF              KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
-
-#define X86_PROPERTY_MAX_EXT_LEAF              KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
-#define X86_PROPERTY_MAX_PHY_ADDR              KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
-#define X86_PROPERTY_MAX_VIRT_ADDR             KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
-#define X86_PROPERTY_GUEST_MAX_PHY_ADDR                KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
-#define X86_PROPERTY_SEV_C_BIT                 KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
-#define X86_PROPERTY_PHYS_ADDR_REDUCTION       KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
-
-#define X86_PROPERTY_MAX_CENTAUR_LEAF          KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
-
-/*
- * Intel's architectural PMU events are bizarre.  They have a "feature" bit
- * that indicates the feature is _not_ supported, and a property that states
- * the length of the bit mask of unsupported features.  A feature is supported
- * if the size of the bit mask is larger than the "unavailable" bit, and said
- * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
- * arch events for general purpose counters.  Fixed counters are supported if a
- * feature flag is set **OR** the total number of fixed counters is greater
- * than index of the counter.
- *
- * Wrap the events for general purpose and fixed counters to simplify checking
- * whether or not a given architectural event is supported.
- */
-struct kvm_x86_pmu_feature {
-       struct kvm_x86_cpu_feature f;
-};
-#define        KVM_X86_PMU_FEATURE(__reg, __bit)                               \
-({                                                                     \
-       struct kvm_x86_pmu_feature feature = {                          \
-               .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),         \
-       };                                                              \
-                                                                       \
-       kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||         \
-                         KVM_CPUID_##__reg == KVM_CPUID_ECX);          \
-       feature;                                                        \
-})
-
-#define X86_PMU_FEATURE_CPU_CYCLES                     KVM_X86_PMU_FEATURE(EBX, 0)
-#define X86_PMU_FEATURE_INSNS_RETIRED                  KVM_X86_PMU_FEATURE(EBX, 1)
-#define X86_PMU_FEATURE_REFERENCE_CYCLES               KVM_X86_PMU_FEATURE(EBX, 2)
-#define X86_PMU_FEATURE_LLC_REFERENCES                 KVM_X86_PMU_FEATURE(EBX, 3)
-#define X86_PMU_FEATURE_LLC_MISSES                     KVM_X86_PMU_FEATURE(EBX, 4)
-#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED           KVM_X86_PMU_FEATURE(EBX, 5)
-#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED          KVM_X86_PMU_FEATURE(EBX, 6)
-#define X86_PMU_FEATURE_TOPDOWN_SLOTS                  KVM_X86_PMU_FEATURE(EBX, 7)
-
-#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED            KVM_X86_PMU_FEATURE(ECX, 0)
-#define X86_PMU_FEATURE_CPU_CYCLES_FIXED               KVM_X86_PMU_FEATURE(ECX, 1)
-#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED     KVM_X86_PMU_FEATURE(ECX, 2)
-#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED            KVM_X86_PMU_FEATURE(ECX, 3)
-
-static inline unsigned int x86_family(unsigned int eax)
-{
-       unsigned int x86;
-
-       x86 = (eax >> 8) & 0xf;
-
-       if (x86 == 0xf)
-               x86 += (eax >> 20) & 0xff;
-
-       return x86;
-}
-
-static inline unsigned int x86_model(unsigned int eax)
-{
-       return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
-}
-
-/* Page table bitfield declarations */
-#define PTE_PRESENT_MASK        BIT_ULL(0)
-#define PTE_WRITABLE_MASK       BIT_ULL(1)
-#define PTE_USER_MASK           BIT_ULL(2)
-#define PTE_ACCESSED_MASK       BIT_ULL(5)
-#define PTE_DIRTY_MASK          BIT_ULL(6)
-#define PTE_LARGE_MASK          BIT_ULL(7)
-#define PTE_GLOBAL_MASK         BIT_ULL(8)
-#define PTE_NX_MASK             BIT_ULL(63)
-
-#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
-
-#define PAGE_SHIFT             12
-#define PAGE_SIZE              (1ULL << PAGE_SHIFT)
-#define PAGE_MASK              (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
-
-#define HUGEPAGE_SHIFT(x)      (PAGE_SHIFT + (((x) - 1) * 9))
-#define HUGEPAGE_SIZE(x)       (1UL << HUGEPAGE_SHIFT(x))
-#define HUGEPAGE_MASK(x)       (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
-
-#define PTE_GET_PA(pte)                ((pte) & PHYSICAL_PAGE_MASK)
-#define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
-
-/* General Registers in 64-Bit Mode */
-struct gpr64_regs {
-       u64 rax;
-       u64 rcx;
-       u64 rdx;
-       u64 rbx;
-       u64 rsp;
-       u64 rbp;
-       u64 rsi;
-       u64 rdi;
-       u64 r8;
-       u64 r9;
-       u64 r10;
-       u64 r11;
-       u64 r12;
-       u64 r13;
-       u64 r14;
-       u64 r15;
-};
-
-struct desc64 {
-       uint16_t limit0;
-       uint16_t base0;
-       unsigned base1:8, type:4, s:1, dpl:2, p:1;
-       unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
-       uint32_t base3;
-       uint32_t zero1;
-} __attribute__((packed));
-
-struct desc_ptr {
-       uint16_t size;
-       uint64_t address;
-} __attribute__((packed));
-
-struct kvm_x86_state {
-       struct kvm_xsave *xsave;
-       struct kvm_vcpu_events events;
-       struct kvm_mp_state mp_state;
-       struct kvm_regs regs;
-       struct kvm_xcrs xcrs;
-       struct kvm_sregs sregs;
-       struct kvm_debugregs debugregs;
-       union {
-               struct kvm_nested_state nested;
-               char nested_[16384];
-       };
-       struct kvm_msrs msrs;
-};
-
-static inline uint64_t get_desc64_base(const struct desc64 *desc)
-{
-       return ((uint64_t)desc->base3 << 32) |
-               (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
-}
-
-static inline uint64_t rdtsc(void)
-{
-       uint32_t eax, edx;
-       uint64_t tsc_val;
-       /*
-        * The lfence is to wait (on Intel CPUs) until all previous
-        * instructions have been executed. If software requires RDTSC to be
-        * executed prior to execution of any subsequent instruction, it can
-        * execute LFENCE immediately after RDTSC
-        */
-       __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
-       tsc_val = ((uint64_t)edx) << 32 | eax;
-       return tsc_val;
-}
-
-static inline uint64_t rdtscp(uint32_t *aux)
-{
-       uint32_t eax, edx;
-
-       __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
-       return ((uint64_t)edx) << 32 | eax;
-}
-
-static inline uint64_t rdmsr(uint32_t msr)
-{
-       uint32_t a, d;
-
-       __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
-       return a | ((uint64_t) d << 32);
-}
-
-static inline void wrmsr(uint32_t msr, uint64_t value)
-{
-       uint32_t a = value;
-       uint32_t d = value >> 32;
-
-       __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-
-static inline uint16_t inw(uint16_t port)
-{
-       uint16_t tmp;
-
-       __asm__ __volatile__("in %%dx, %%ax"
-               : /* output */ "=a" (tmp)
-               : /* input */ "d" (port));
-
-       return tmp;
-}
-
-static inline uint16_t get_es(void)
-{
-       uint16_t es;
-
-       __asm__ __volatile__("mov %%es, %[es]"
-                            : /* output */ [es]"=rm"(es));
-       return es;
-}
-
-static inline uint16_t get_cs(void)
-{
-       uint16_t cs;
-
-       __asm__ __volatile__("mov %%cs, %[cs]"
-                            : /* output */ [cs]"=rm"(cs));
-       return cs;
-}
-
-static inline uint16_t get_ss(void)
-{
-       uint16_t ss;
-
-       __asm__ __volatile__("mov %%ss, %[ss]"
-                            : /* output */ [ss]"=rm"(ss));
-       return ss;
-}
-
-static inline uint16_t get_ds(void)
-{
-       uint16_t ds;
-
-       __asm__ __volatile__("mov %%ds, %[ds]"
-                            : /* output */ [ds]"=rm"(ds));
-       return ds;
-}
-
-static inline uint16_t get_fs(void)
-{
-       uint16_t fs;
-
-       __asm__ __volatile__("mov %%fs, %[fs]"
-                            : /* output */ [fs]"=rm"(fs));
-       return fs;
-}
-
-static inline uint16_t get_gs(void)
-{
-       uint16_t gs;
-
-       __asm__ __volatile__("mov %%gs, %[gs]"
-                            : /* output */ [gs]"=rm"(gs));
-       return gs;
-}
-
-static inline uint16_t get_tr(void)
-{
-       uint16_t tr;
-
-       __asm__ __volatile__("str %[tr]"
-                            : /* output */ [tr]"=rm"(tr));
-       return tr;
-}
-
-static inline uint64_t get_cr0(void)
-{
-       uint64_t cr0;
-
-       __asm__ __volatile__("mov %%cr0, %[cr0]"
-                            : /* output */ [cr0]"=r"(cr0));
-       return cr0;
-}
-
-static inline uint64_t get_cr3(void)
-{
-       uint64_t cr3;
-
-       __asm__ __volatile__("mov %%cr3, %[cr3]"
-                            : /* output */ [cr3]"=r"(cr3));
-       return cr3;
-}
-
-static inline uint64_t get_cr4(void)
-{
-       uint64_t cr4;
-
-       __asm__ __volatile__("mov %%cr4, %[cr4]"
-                            : /* output */ [cr4]"=r"(cr4));
-       return cr4;
-}
-
-static inline void set_cr4(uint64_t val)
-{
-       __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
-}
-
-static inline u64 xgetbv(u32 index)
-{
-       u32 eax, edx;
-
-       __asm__ __volatile__("xgetbv;"
-                    : "=a" (eax), "=d" (edx)
-                    : "c" (index));
-       return eax | ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
-       u32 eax = value;
-       u32 edx = value >> 32;
-
-       __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
-}
-
-static inline void wrpkru(u32 pkru)
-{
-       /* Note, ECX and EDX are architecturally required to be '0'. */
-       asm volatile(".byte 0x0f,0x01,0xef\n\t"
-                    : : "a" (pkru), "c"(0), "d"(0));
-}
-
-static inline struct desc_ptr get_gdt(void)
-{
-       struct desc_ptr gdt;
-       __asm__ __volatile__("sgdt %[gdt]"
-                            : /* output */ [gdt]"=m"(gdt));
-       return gdt;
-}
-
-static inline struct desc_ptr get_idt(void)
-{
-       struct desc_ptr idt;
-       __asm__ __volatile__("sidt %[idt]"
-                            : /* output */ [idt]"=m"(idt));
-       return idt;
-}
-
-static inline void outl(uint16_t port, uint32_t value)
-{
-       __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
-}
-
-static inline void __cpuid(uint32_t function, uint32_t index,
-                          uint32_t *eax, uint32_t *ebx,
-                          uint32_t *ecx, uint32_t *edx)
-{
-       *eax = function;
-       *ecx = index;
-
-       asm volatile("cpuid"
-           : "=a" (*eax),
-             "=b" (*ebx),
-             "=c" (*ecx),
-             "=d" (*edx)
-           : "0" (*eax), "2" (*ecx)
-           : "memory");
-}
-
-static inline void cpuid(uint32_t function,
-                        uint32_t *eax, uint32_t *ebx,
-                        uint32_t *ecx, uint32_t *edx)
-{
-       return __cpuid(function, 0, eax, ebx, ecx, edx);
-}
-
-static inline uint32_t this_cpu_fms(void)
-{
-       uint32_t eax, ebx, ecx, edx;
-
-       cpuid(1, &eax, &ebx, &ecx, &edx);
-       return eax;
-}
-
-static inline uint32_t this_cpu_family(void)
-{
-       return x86_family(this_cpu_fms());
-}
-
-static inline uint32_t this_cpu_model(void)
-{
-       return x86_model(this_cpu_fms());
-}
-
-static inline bool this_cpu_vendor_string_is(const char *vendor)
-{
-       const uint32_t *chunk = (const uint32_t *)vendor;
-       uint32_t eax, ebx, ecx, edx;
-
-       cpuid(0, &eax, &ebx, &ecx, &edx);
-       return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
-}
-
-static inline bool this_cpu_is_intel(void)
-{
-       return this_cpu_vendor_string_is("GenuineIntel");
-}
-
-/*
- * Exclude early K5 samples with a vendor string of "AMDisbetter!"
- */
-static inline bool this_cpu_is_amd(void)
-{
-       return this_cpu_vendor_string_is("AuthenticAMD");
-}
-
-static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
-                                     uint8_t reg, uint8_t lo, uint8_t hi)
-{
-       uint32_t gprs[4];
-
-       __cpuid(function, index,
-               &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
-               &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
-
-       return (gprs[reg] & GENMASK(hi, lo)) >> lo;
-}
-
-static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
-{
-       return __this_cpu_has(feature.function, feature.index,
-                             feature.reg, feature.bit, feature.bit);
-}
-
-static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
-{
-       return __this_cpu_has(property.function, property.index,
-                             property.reg, property.lo_bit, property.hi_bit);
-}
-
-static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
-{
-       uint32_t max_leaf;
-
-       switch (property.function & 0xc0000000) {
-       case 0:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
-               break;
-       case 0x40000000:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
-               break;
-       case 0x80000000:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
-               break;
-       case 0xc0000000:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
-       }
-       return max_leaf >= property.function;
-}
-
-static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
-{
-       uint32_t nr_bits;
-
-       if (feature.f.reg == KVM_CPUID_EBX) {
-               nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-               return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
-       }
-
-       GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
-       nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       return nr_bits > feature.f.bit || this_cpu_has(feature.f);
-}
-
-static __always_inline uint64_t this_cpu_supported_xcr0(void)
-{
-       if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
-               return 0;
-
-       return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
-              ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
-}
-
-typedef u32            __attribute__((vector_size(16))) sse128_t;
-#define __sse128_u     union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
-#define sse128_lo(x)   ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
-#define sse128_hi(x)   ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
-
-static inline void read_sse_reg(int reg, sse128_t *data)
-{
-       switch (reg) {
-       case 0:
-               asm("movdqa %%xmm0, %0" : "=m"(*data));
-               break;
-       case 1:
-               asm("movdqa %%xmm1, %0" : "=m"(*data));
-               break;
-       case 2:
-               asm("movdqa %%xmm2, %0" : "=m"(*data));
-               break;
-       case 3:
-               asm("movdqa %%xmm3, %0" : "=m"(*data));
-               break;
-       case 4:
-               asm("movdqa %%xmm4, %0" : "=m"(*data));
-               break;
-       case 5:
-               asm("movdqa %%xmm5, %0" : "=m"(*data));
-               break;
-       case 6:
-               asm("movdqa %%xmm6, %0" : "=m"(*data));
-               break;
-       case 7:
-               asm("movdqa %%xmm7, %0" : "=m"(*data));
-               break;
-       default:
-               BUG();
-       }
-}
-
-static inline void write_sse_reg(int reg, const sse128_t *data)
-{
-       switch (reg) {
-       case 0:
-               asm("movdqa %0, %%xmm0" : : "m"(*data));
-               break;
-       case 1:
-               asm("movdqa %0, %%xmm1" : : "m"(*data));
-               break;
-       case 2:
-               asm("movdqa %0, %%xmm2" : : "m"(*data));
-               break;
-       case 3:
-               asm("movdqa %0, %%xmm3" : : "m"(*data));
-               break;
-       case 4:
-               asm("movdqa %0, %%xmm4" : : "m"(*data));
-               break;
-       case 5:
-               asm("movdqa %0, %%xmm5" : : "m"(*data));
-               break;
-       case 6:
-               asm("movdqa %0, %%xmm6" : : "m"(*data));
-               break;
-       case 7:
-               asm("movdqa %0, %%xmm7" : : "m"(*data));
-               break;
-       default:
-               BUG();
-       }
-}
-
-static inline void cpu_relax(void)
-{
-       asm volatile("rep; nop" ::: "memory");
-}
-
-static inline void udelay(unsigned long usec)
-{
-       uint64_t start, now, cycles;
-
-       GUEST_ASSERT(guest_tsc_khz);
-       cycles = guest_tsc_khz / 1000 * usec;
-
-       /*
-        * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
-        * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
-        */
-       start = rdtsc();
-       do {
-               now = rdtsc();
-       } while (now - start < cycles);
-}
-
-#define ud2()                  \
-       __asm__ __volatile__(   \
-               "ud2\n" \
-               )
-
-#define hlt()                  \
-       __asm__ __volatile__(   \
-               "hlt\n" \
-               )
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
-void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
-void kvm_x86_state_cleanup(struct kvm_x86_state *state);
-
-const struct kvm_msr_list *kvm_get_msr_index_list(void);
-const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
-uint64_t kvm_get_feature_msr(uint64_t msr_index);
-
-static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
-                                struct kvm_msrs *msrs)
-{
-       int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
-
-       TEST_ASSERT(r == msrs->nmsrs,
-                   "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
-                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
-}
-static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
-{
-       int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
-
-       TEST_ASSERT(r == msrs->nmsrs,
-                   "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
-                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
-}
-static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
-                                     struct kvm_debugregs *debugregs)
-{
-       vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
-}
-static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
-                                     struct kvm_debugregs *debugregs)
-{
-       vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
-}
-static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
-                                 struct kvm_xsave *xsave)
-{
-       vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
-}
-static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
-                                  struct kvm_xsave *xsave)
-{
-       vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
-}
-static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
-                                 struct kvm_xsave *xsave)
-{
-       vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
-}
-static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
-                                struct kvm_xcrs *xcrs)
-{
-       vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
-}
-static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
-{
-       vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
-}
-
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
-                                              uint32_t function, uint32_t index);
-const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-
-static inline uint32_t kvm_cpu_fms(void)
-{
-       return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
-}
-
-static inline uint32_t kvm_cpu_family(void)
-{
-       return x86_family(kvm_cpu_fms());
-}
-
-static inline uint32_t kvm_cpu_model(void)
-{
-       return x86_model(kvm_cpu_fms());
-}
-
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
-                  struct kvm_x86_cpu_feature feature);
-
-static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
-{
-       return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
-}
-
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
-                           struct kvm_x86_cpu_property property);
-
-static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
-{
-       return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
-}
-
-static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
-{
-       uint32_t max_leaf;
-
-       switch (property.function & 0xc0000000) {
-       case 0:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
-               break;
-       case 0x40000000:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
-               break;
-       case 0x80000000:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
-               break;
-       case 0xc0000000:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
-       }
-       return max_leaf >= property.function;
-}
-
-static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
-{
-       uint32_t nr_bits;
-
-       if (feature.f.reg == KVM_CPUID_EBX) {
-               nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-               return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
-       }
-
-       TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
-       nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
-}
-
-static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
-{
-       if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
-               return 0;
-
-       return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
-              ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
-}
-
-static inline size_t kvm_cpuid2_size(int nr_entries)
-{
-       return sizeof(struct kvm_cpuid2) +
-              sizeof(struct kvm_cpuid_entry2) * nr_entries;
-}
-
-/*
- * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
- * entries sized to hold @nr_entries.  The caller is responsible for freeing
- * the struct.
- */
-static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
-{
-       struct kvm_cpuid2 *cpuid;
-
-       cpuid = malloc(kvm_cpuid2_size(nr_entries));
-       TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
-
-       cpuid->nent = nr_entries;
-
-       return cpuid;
-}
-
-void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
-
-static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
-                                                             uint32_t function,
-                                                             uint32_t index)
-{
-       return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
-                                                         function, index);
-}
-
-static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
-                                                           uint32_t function)
-{
-       return __vcpu_get_cpuid_entry(vcpu, function, 0);
-}
-
-static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
-{
-       int r;
-
-       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
-       r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
-       if (r)
-               return r;
-
-       /* On success, refresh the cache to pick up adjustments made by KVM. */
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-       return 0;
-}
-
-static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
-{
-       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
-       vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
-
-       /* Refresh the cache to pick up adjustments made by KVM. */
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
-{
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
-                            struct kvm_x86_cpu_property property,
-                            uint32_t value);
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
-
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
-
-static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
-                                 struct kvm_x86_cpu_feature feature)
-{
-       struct kvm_cpuid_entry2 *entry;
-
-       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
-       return *((&entry->eax) + feature.reg) & BIT(feature.bit);
-}
-
-void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
-                                    struct kvm_x86_cpu_feature feature,
-                                    bool set);
-
-static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
-                                         struct kvm_x86_cpu_feature feature)
-{
-       vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
-
-}
-
-static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
-                                           struct kvm_x86_cpu_feature feature)
-{
-       vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
-
-/*
- * Assert on an MSR access(es) and pretty print the MSR name when possible.
- * Note, the caller provides the stringified name so that the name of macro is
- * printed, not the value the macro resolves to (due to macro expansion).
- */
-#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)                          \
-do {                                                                           \
-       if (__builtin_constant_p(msr)) {                                        \
-               TEST_ASSERT(cond, fmt, str, args);                              \
-       } else if (!(cond)) {                                                   \
-               char buf[16];                                                   \
-                                                                               \
-               snprintf(buf, sizeof(buf), "MSR 0x%x", msr);                    \
-               TEST_ASSERT(cond, fmt, buf, args);                              \
-       }                                                                       \
-} while (0)
-
-/*
- * Returns true if KVM should return the last written value when reading an MSR
- * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
- * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
- * out MSRs that are not durable _and_ that a selftest wants to write.
- */
-static inline bool is_durable_msr(uint32_t msr)
-{
-       return msr != MSR_IA32_TSC;
-}
-
-#define vcpu_set_msr(vcpu, msr, val)                                                   \
-do {                                                                                   \
-       uint64_t r, v = val;                                                            \
-                                                                                       \
-       TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,                               \
-                       "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);      \
-       if (!is_durable_msr(msr))                                                       \
-               break;                                                                  \
-       r = vcpu_get_msr(vcpu, msr);                                                    \
-       TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
-} while (0)
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
-void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-struct ex_regs {
-       uint64_t rax, rcx, rdx, rbx;
-       uint64_t rbp, rsi, rdi;
-       uint64_t r8, r9, r10, r11;
-       uint64_t r12, r13, r14, r15;
-       uint64_t vector;
-       uint64_t error_code;
-       uint64_t rip;
-       uint64_t cs;
-       uint64_t rflags;
-};
-
-struct idt_entry {
-       uint16_t offset0;
-       uint16_t selector;
-       uint16_t ist : 3;
-       uint16_t : 5;
-       uint16_t type : 4;
-       uint16_t : 1;
-       uint16_t dpl : 2;
-       uint16_t p : 1;
-       uint16_t offset1;
-       uint32_t offset2; uint32_t reserved;
-};
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-                       void (*handler)(struct ex_regs *));
-
-/* If a toddler were to say "abracadabra". */
-#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
-
-/*
- * KVM selftest exception fixup uses registers to coordinate with the exception
- * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
- * per-CPU data.  Using only registers avoids having to map memory into the
- * guest, doesn't require a valid, stable GS.base, and reduces the risk of
- * for recursive faults when accessing memory in the handler.  The downside to
- * using registers is that it restricts what registers can be used by the actual
- * instruction.  But, selftests are 64-bit only, making register* pressure a
- * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
- * by the callee, and except for r11 are not implicit parameters to any
- * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
- * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
- * is higher priority than testing non-faulting SYSCALL/SYSRET.
- *
- * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
- * is guaranteed to be non-zero on fault.
- *
- * REGISTER INPUTS:
- * r9  = MAGIC
- * r10 = RIP
- * r11 = new RIP on fault
- *
- * REGISTER OUTPUTS:
- * r9  = exception vector (non-zero)
- * r10 = error code
- */
-#define __KVM_ASM_SAFE(insn, fep)                              \
-       "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"   \
-       "lea 1f(%%rip), %%r10\n\t"                              \
-       "lea 2f(%%rip), %%r11\n\t"                              \
-       fep "1: " insn "\n\t"                                   \
-       "xor %%r9, %%r9\n\t"                                    \
-       "2:\n\t"                                                \
-       "mov  %%r9b, %[vector]\n\t"                             \
-       "mov  %%r10, %[error_code]\n\t"
-
-#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
-#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
-
-#define KVM_ASM_SAFE_OUTPUTS(v, ec)    [vector] "=qm"(v), [error_code] "=rm"(ec)
-#define KVM_ASM_SAFE_CLOBBERS  "r9", "r10", "r11"
-
-#define kvm_asm_safe(insn, inputs...)                                  \
-({                                                                     \
-       uint64_t ign_error_code;                                        \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE(insn)                                 \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define kvm_asm_safe_ec(insn, error_code, inputs...)                   \
-({                                                                     \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE(insn)                                 \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define kvm_asm_safe_fep(insn, inputs...)                              \
-({                                                                     \
-       uint64_t ign_error_code;                                        \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE(insn)                                 \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)               \
-({                                                                     \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE_FEP(insn)                             \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                   \
-static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)  \
-{                                                                      \
-       uint64_t error_code;                                            \
-       uint8_t vector;                                                 \
-       uint32_t a, d;                                                  \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE##_FEP(#insn)                          \
-                    : "=a"(a), "=d"(d),                                \
-                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
-                    : "c"(idx)                                         \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-                                                                       \
-       *val = (uint64_t)a | ((uint64_t)d << 32);                       \
-       return vector;                                                  \
-}
-
-/*
- * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
- * use ECX as in input index, and EDX:EAX as a 64-bit output.
- */
-#define BUILD_READ_U64_SAFE_HELPERS(insn)                              \
-       BUILD_READ_U64_SAFE_HELPER(insn, , )                            \
-       BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                    \
-
-BUILD_READ_U64_SAFE_HELPERS(rdmsr)
-BUILD_READ_U64_SAFE_HELPERS(rdpmc)
-BUILD_READ_U64_SAFE_HELPERS(xgetbv)
-
-static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
-{
-       return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
-}
-
-static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
-{
-       u32 eax = value;
-       u32 edx = value >> 32;
-
-       return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
-}
-
-bool kvm_is_tdp_enabled(void);
-
-static inline bool kvm_is_pmu_enabled(void)
-{
-       return get_kvm_param_bool("enable_pmu");
-}
-
-static inline bool kvm_is_forced_emulation_enabled(void)
-{
-       return !!get_kvm_param_integer("force_emulation_prefix");
-}
-
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
-                                   int *level);
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
-                      uint64_t a3);
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-
-static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
-                                                    uint64_t size, uint64_t flags)
-{
-       return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
-}
-
-static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
-                                              uint64_t flags)
-{
-       uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
-
-       GUEST_ASSERT(!ret);
-}
-
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
-
-#define vm_xsave_require_permission(xfeature)  \
-       __vm_xsave_require_permission(xfeature, #xfeature)
-
-enum pg_level {
-       PG_LEVEL_NONE,
-       PG_LEVEL_4K,
-       PG_LEVEL_2M,
-       PG_LEVEL_1G,
-       PG_LEVEL_512G,
-       PG_LEVEL_NUM
-};
-
-#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
-#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
-
-#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
-#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
-#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
-
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-                   uint64_t nr_bytes, int level);
-
-/*
- * Basic CPU control in CR0
- */
-#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
-#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
-#define X86_CR0_EM          (1UL<<2) /* Emulation */
-#define X86_CR0_TS          (1UL<<3) /* Task Switched */
-#define X86_CR0_ET          (1UL<<4) /* Extension Type */
-#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
-#define X86_CR0_WP          (1UL<<16) /* Write Protect */
-#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
-#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
-#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
-#define X86_CR0_PG          (1UL<<31) /* Paging */
-
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_SGX_BIT 15
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-#define PFERR_IMPLICIT_ACCESS_BIT 48
-
-#define PFERR_PRESENT_MASK     BIT(PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK       BIT(PFERR_WRITE_BIT)
-#define PFERR_USER_MASK                BIT(PFERR_USER_BIT)
-#define PFERR_RSVD_MASK                BIT(PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK       BIT(PFERR_FETCH_BIT)
-#define PFERR_PK_MASK          BIT(PFERR_PK_BIT)
-#define PFERR_SGX_MASK         BIT(PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK  BIT_ULL(PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS  BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
-
-bool sys_clocksource_is_based_on_tsc(void);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h

deleted file mode 100644 (file)

index 82c11c8..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/sev.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Helpers used for SEV guests
- *
- */
-#ifndef SELFTEST_KVM_SEV_H
-#define SELFTEST_KVM_SEV_H
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "linux/psp-sev.h"
-
-#include "kvm_util.h"
-#include "svm_util.h"
-#include "processor.h"
-
-enum sev_guest_state {
-       SEV_GUEST_STATE_UNINITIALIZED = 0,
-       SEV_GUEST_STATE_LAUNCH_UPDATE,
-       SEV_GUEST_STATE_LAUNCH_SECRET,
-       SEV_GUEST_STATE_RUNNING,
-};
-
-#define SEV_POLICY_NO_DBG      (1UL << 0)
-#define SEV_POLICY_ES          (1UL << 2)
-
-#define GHCB_MSR_TERM_REQ      0x100
-
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
-void sev_vm_launch_finish(struct kvm_vm *vm);
-
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
-                                          struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
-
-kvm_static_assert(SEV_RET_SUCCESS == 0);
-
-/*
- * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
- * instead of a proper struct.  The size of the parameter is embedded in the
- * ioctl number, i.e. is ABI and thus immutable.  Hack around the mess by
- * creating an overlay to pass in an "unsigned long" without a cast (casting
- * will make the compiler unhappy due to dereferencing an aliased pointer).
- */
-#define __vm_sev_ioctl(vm, cmd, arg)                                   \
-({                                                                     \
-       int r;                                                          \
-                                                                       \
-       union {                                                         \
-               struct kvm_sev_cmd c;                                   \
-               unsigned long raw;                                      \
-       } sev_cmd = { .c = {                                            \
-               .id = (cmd),                                            \
-               .data = (uint64_t)(arg),                                \
-               .sev_fd = (vm)->arch.sev_fd,                            \
-       } };                                                            \
-                                                                       \
-       r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw);        \
-       r ?: sev_cmd.c.error;                                           \
-})
-
-#define vm_sev_ioctl(vm, cmd, arg)                                     \
-({                                                                     \
-       int ret = __vm_sev_ioctl(vm, cmd, arg);                         \
-                                                                       \
-       __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);               \
-})
-
-void sev_vm_init(struct kvm_vm *vm);
-void sev_es_vm_init(struct kvm_vm *vm);
-
-static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
-                                                struct userspace_mem_region *region)
-{
-       struct kvm_enc_region range = {
-               .addr = region->region.userspace_addr,
-               .size = region->region.memory_size,
-       };
-
-       vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
-}
-
-static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
-                                         uint64_t size)
-{
-       struct kvm_sev_launch_update_data update_data = {
-               .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
-               .len = size,
-       };
-
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
-}
-
-#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h

deleted file mode 100644 (file)

index 4803e10..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm.h
- * This is a copy of arch/x86/include/asm/svm.h
- *
- */
-
-#ifndef SELFTEST_KVM_SVM_H
-#define SELFTEST_KVM_SVM_H
-
-enum {
-       INTERCEPT_INTR,
-       INTERCEPT_NMI,
-       INTERCEPT_SMI,
-       INTERCEPT_INIT,
-       INTERCEPT_VINTR,
-       INTERCEPT_SELECTIVE_CR0,
-       INTERCEPT_STORE_IDTR,
-       INTERCEPT_STORE_GDTR,
-       INTERCEPT_STORE_LDTR,
-       INTERCEPT_STORE_TR,
-       INTERCEPT_LOAD_IDTR,
-       INTERCEPT_LOAD_GDTR,
-       INTERCEPT_LOAD_LDTR,
-       INTERCEPT_LOAD_TR,
-       INTERCEPT_RDTSC,
-       INTERCEPT_RDPMC,
-       INTERCEPT_PUSHF,
-       INTERCEPT_POPF,
-       INTERCEPT_CPUID,
-       INTERCEPT_RSM,
-       INTERCEPT_IRET,
-       INTERCEPT_INTn,
-       INTERCEPT_INVD,
-       INTERCEPT_PAUSE,
-       INTERCEPT_HLT,
-       INTERCEPT_INVLPG,
-       INTERCEPT_INVLPGA,
-       INTERCEPT_IOIO_PROT,
-       INTERCEPT_MSR_PROT,
-       INTERCEPT_TASK_SWITCH,
-       INTERCEPT_FERR_FREEZE,
-       INTERCEPT_SHUTDOWN,
-       INTERCEPT_VMRUN,
-       INTERCEPT_VMMCALL,
-       INTERCEPT_VMLOAD,
-       INTERCEPT_VMSAVE,
-       INTERCEPT_STGI,
-       INTERCEPT_CLGI,
-       INTERCEPT_SKINIT,
-       INTERCEPT_RDTSCP,
-       INTERCEPT_ICEBP,
-       INTERCEPT_WBINVD,
-       INTERCEPT_MONITOR,
-       INTERCEPT_MWAIT,
-       INTERCEPT_MWAIT_COND,
-       INTERCEPT_XSETBV,
-       INTERCEPT_RDPRU,
-};
-
-struct hv_vmcb_enlightenments {
-       struct __packed hv_enlightenments_control {
-               u32 nested_flush_hypercall:1;
-               u32 msr_bitmap:1;
-               u32 enlightened_npt_tlb: 1;
-               u32 reserved:29;
-       } __packed hv_enlightenments_control;
-       u32 hv_vp_id;
-       u64 hv_vm_id;
-       u64 partition_assist_page;
-       u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB
- */
-#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
-
-/* Synthetic VM-Exit */
-#define HV_SVM_EXITCODE_ENL                    0xf0000000
-#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH   (1)
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
-       u32 intercept_cr;
-       u32 intercept_dr;
-       u32 intercept_exceptions;
-       u64 intercept;
-       u8 reserved_1[40];
-       u16 pause_filter_thresh;
-       u16 pause_filter_count;
-       u64 iopm_base_pa;
-       u64 msrpm_base_pa;
-       u64 tsc_offset;
-       u32 asid;
-       u8 tlb_ctl;
-       u8 reserved_2[3];
-       u32 int_ctl;
-       u32 int_vector;
-       u32 int_state;
-       u8 reserved_3[4];
-       u32 exit_code;
-       u32 exit_code_hi;
-       u64 exit_info_1;
-       u64 exit_info_2;
-       u32 exit_int_info;
-       u32 exit_int_info_err;
-       u64 nested_ctl;
-       u64 avic_vapic_bar;
-       u8 reserved_4[8];
-       u32 event_inj;
-       u32 event_inj_err;
-       u64 nested_cr3;
-       u64 virt_ext;
-       u32 clean;
-       u32 reserved_5;
-       u64 next_rip;
-       u8 insn_len;
-       u8 insn_bytes[15];
-       u64 avic_backing_page;  /* Offset 0xe0 */
-       u8 reserved_6[8];       /* Offset 0xe8 */
-       u64 avic_logical_id;    /* Offset 0xf0 */
-       u64 avic_physical_id;   /* Offset 0xf8 */
-       u8 reserved_7[8];
-       u64 vmsa_pa;            /* Used for an SEV-ES guest */
-       u8 reserved_8[720];
-       /*
-        * Offset 0x3e0, 32 bytes reserved
-        * for use by hypervisor/software.
-        */
-       union {
-               struct hv_vmcb_enlightenments hv_enlightenments;
-               u8 reserved_sw[32];
-       };
-};
-
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-#define TLB_CONTROL_FLUSH_ASID 3
-#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_GIF_SHIFT 9
-#define V_GIF_MASK (1 << V_GIF_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define V_GIF_ENABLE_SHIFT 25
-#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
-
-#define AVIC_ENABLE_SHIFT 31
-#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
-
-#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
-#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-#define SVM_VM_CR_VALID_MASK   0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
-
-#define SVM_NESTED_CTL_NP_ENABLE       BIT(0)
-#define SVM_NESTED_CTL_SEV_ENABLE      BIT(1)
-
-struct __attribute__ ((__packed__)) vmcb_seg {
-       u16 selector;
-       u16 attrib;
-       u32 limit;
-       u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
-       struct vmcb_seg es;
-       struct vmcb_seg cs;
-       struct vmcb_seg ss;
-       struct vmcb_seg ds;
-       struct vmcb_seg fs;
-       struct vmcb_seg gs;
-       struct vmcb_seg gdtr;
-       struct vmcb_seg ldtr;
-       struct vmcb_seg idtr;
-       struct vmcb_seg tr;
-       u8 reserved_1[43];
-       u8 cpl;
-       u8 reserved_2[4];
-       u64 efer;
-       u8 reserved_3[112];
-       u64 cr4;
-       u64 cr3;
-       u64 cr0;
-       u64 dr7;
-       u64 dr6;
-       u64 rflags;
-       u64 rip;
-       u8 reserved_4[88];
-       u64 rsp;
-       u8 reserved_5[24];
-       u64 rax;
-       u64 star;
-       u64 lstar;
-       u64 cstar;
-       u64 sfmask;
-       u64 kernel_gs_base;
-       u64 sysenter_cs;
-       u64 sysenter_esp;
-       u64 sysenter_eip;
-       u64 cr2;
-       u8 reserved_6[32];
-       u64 g_pat;
-       u64 dbgctl;
-       u64 br_from;
-       u64 br_to;
-       u64 last_excp_from;
-       u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
-       struct vmcb_control_area control;
-       struct vmcb_save_area save;
-};
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_READ     0
-#define INTERCEPT_CR3_READ     3
-#define INTERCEPT_CR4_READ     4
-#define INTERCEPT_CR8_READ     8
-#define INTERCEPT_CR0_WRITE    (16 + 0)
-#define INTERCEPT_CR3_WRITE    (16 + 3)
-#define INTERCEPT_CR4_WRITE    (16 + 4)
-#define INTERCEPT_CR8_WRITE    (16 + 8)
-
-#define INTERCEPT_DR0_READ     0
-#define INTERCEPT_DR1_READ     1
-#define INTERCEPT_DR2_READ     2
-#define INTERCEPT_DR3_READ     3
-#define INTERCEPT_DR4_READ     4
-#define INTERCEPT_DR5_READ     5
-#define INTERCEPT_DR6_READ     6
-#define INTERCEPT_DR7_READ     7
-#define INTERCEPT_DR0_WRITE    (16 + 0)
-#define INTERCEPT_DR1_WRITE    (16 + 1)
-#define INTERCEPT_DR2_WRITE    (16 + 2)
-#define INTERCEPT_DR3_WRITE    (16 + 3)
-#define INTERCEPT_DR4_WRITE    (16 + 4)
-#define INTERCEPT_DR5_WRITE    (16 + 5)
-#define INTERCEPT_DR6_WRITE    (16 + 6)
-#define INTERCEPT_DR7_WRITE    (16 + 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
-
-#define        SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define        SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define        SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define        SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
-
-#define SVM_EXITINFO_REG_MASK 0x0F
-
-#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
-
-#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h

deleted file mode 100644 (file)

index 044f0f8..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
- * Header for nested SVM testing
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#ifndef SELFTEST_KVM_SVM_UTILS_H
-#define SELFTEST_KVM_SVM_UTILS_H
-
-#include <asm/svm.h>
-
-#include <stdint.h>
-#include "svm.h"
-#include "processor.h"
-
-struct svm_test_data {
-       /* VMCB */
-       struct vmcb *vmcb; /* gva */
-       void *vmcb_hva;
-       uint64_t vmcb_gpa;
-
-       /* host state-save area */
-       struct vmcb_save_area *save_area; /* gva */
-       void *save_area_hva;
-       uint64_t save_area_gpa;
-
-       /* MSR-Bitmap */
-       void *msr; /* gva */
-       void *msr_hva;
-       uint64_t msr_gpa;
-};
-
-static inline void vmmcall(void)
-{
-       /*
-        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
-        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
-        * use of this function is to exit to L1 from L2.  Clobber all other
-        * GPRs as L1 doesn't correctly preserve them during vmexits.
-        */
-       __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
-                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
-                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                              "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-#define stgi()                 \
-       __asm__ __volatile__(   \
-               "stgi\n"        \
-               )
-
-#define clgi()                 \
-       __asm__ __volatile__(   \
-               "clgi\n"        \
-               )
-
-struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
-
-int open_sev_dev_path_or_exit(void);
-
-#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h

deleted file mode 100644 (file)

index d3825dc..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/ucall.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON       KVM_EXIT_IO
-
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h

deleted file mode 100644 (file)

index 5f0c0a2..0000000
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ /dev/null
@@ -1,577 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/vmx.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_VMX_H
-#define SELFTEST_KVM_VMX_H
-
-#include <asm/vmx.h>
-
-#include <stdint.h>
-#include "processor.h"
-#include "apic.h"
-
-/*
- * Definitions of Primary Processor-Based VM-Execution Controls.
- */
-#define CPU_BASED_INTR_WINDOW_EXITING          0x00000004
-#define CPU_BASED_USE_TSC_OFFSETTING           0x00000008
-#define CPU_BASED_HLT_EXITING                  0x00000080
-#define CPU_BASED_INVLPG_EXITING               0x00000200
-#define CPU_BASED_MWAIT_EXITING                        0x00000400
-#define CPU_BASED_RDPMC_EXITING                        0x00000800
-#define CPU_BASED_RDTSC_EXITING                        0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING             0x00008000
-#define CPU_BASED_CR3_STORE_EXITING            0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING             0x00080000
-#define CPU_BASED_CR8_STORE_EXITING            0x00100000
-#define CPU_BASED_TPR_SHADOW                   0x00200000
-#define CPU_BASED_NMI_WINDOW_EXITING           0x00400000
-#define CPU_BASED_MOV_DR_EXITING               0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING            0x01000000
-#define CPU_BASED_USE_IO_BITMAPS               0x02000000
-#define CPU_BASED_MONITOR_TRAP                 0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS              0x10000000
-#define CPU_BASED_MONITOR_EXITING              0x20000000
-#define CPU_BASED_PAUSE_EXITING                        0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS  0x80000000
-
-#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x0401e172
-
-/*
- * Definitions of Secondary Processor-Based VM-Execution Controls.
- */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT              0x00000002
-#define SECONDARY_EXEC_DESC                    0x00000004
-#define SECONDARY_EXEC_ENABLE_RDTSCP           0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE  0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID             0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING          0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST      0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT      0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY   0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING      0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING          0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC           0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS             0x00004000
-#define SECONDARY_EXEC_RDSEED_EXITING          0x00010000
-#define SECONDARY_EXEC_ENABLE_PML              0x00020000
-#define SECONDARY_EPT_VE                       0x00040000
-#define SECONDARY_ENABLE_XSAV_RESTORE          0x00100000
-#define SECONDARY_EXEC_TSC_SCALING             0x02000000
-
-#define PIN_BASED_EXT_INTR_MASK                        0x00000001
-#define PIN_BASED_NMI_EXITING                  0x00000008
-#define PIN_BASED_VIRTUAL_NMIS                 0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER         0x00000040
-#define PIN_BASED_POSTED_INTR                  0x00000080
-
-#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x00000016
-
-#define VM_EXIT_SAVE_DEBUG_CONTROLS            0x00000004
-#define VM_EXIT_HOST_ADDR_SPACE_SIZE           0x00000200
-#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL     0x00001000
-#define VM_EXIT_ACK_INTR_ON_EXIT               0x00008000
-#define VM_EXIT_SAVE_IA32_PAT                  0x00040000
-#define VM_EXIT_LOAD_IA32_PAT                  0x00080000
-#define VM_EXIT_SAVE_IA32_EFER                 0x00100000
-#define VM_EXIT_LOAD_IA32_EFER                 0x00200000
-#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER      0x00400000
-
-#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR      0x00036dff
-
-#define VM_ENTRY_LOAD_DEBUG_CONTROLS           0x00000004
-#define VM_ENTRY_IA32E_MODE                    0x00000200
-#define VM_ENTRY_SMM                           0x00000400
-#define VM_ENTRY_DEACT_DUAL_MONITOR            0x00000800
-#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL    0x00002000
-#define VM_ENTRY_LOAD_IA32_PAT                 0x00004000
-#define VM_ENTRY_LOAD_IA32_EFER                        0x00008000
-
-#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR     0x000011ff
-
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA                 0x00000020
-
-#define VMX_EPT_VPID_CAP_1G_PAGES              0x00020000
-#define VMX_EPT_VPID_CAP_AD_BITS               0x00200000
-
-#define EXIT_REASON_FAILED_VMENTRY     0x80000000
-
-enum vmcs_field {
-       VIRTUAL_PROCESSOR_ID            = 0x00000000,
-       POSTED_INTR_NV                  = 0x00000002,
-       GUEST_ES_SELECTOR               = 0x00000800,
-       GUEST_CS_SELECTOR               = 0x00000802,
-       GUEST_SS_SELECTOR               = 0x00000804,
-       GUEST_DS_SELECTOR               = 0x00000806,
-       GUEST_FS_SELECTOR               = 0x00000808,
-       GUEST_GS_SELECTOR               = 0x0000080a,
-       GUEST_LDTR_SELECTOR             = 0x0000080c,
-       GUEST_TR_SELECTOR               = 0x0000080e,
-       GUEST_INTR_STATUS               = 0x00000810,
-       GUEST_PML_INDEX                 = 0x00000812,
-       HOST_ES_SELECTOR                = 0x00000c00,
-       HOST_CS_SELECTOR                = 0x00000c02,
-       HOST_SS_SELECTOR                = 0x00000c04,
-       HOST_DS_SELECTOR                = 0x00000c06,
-       HOST_FS_SELECTOR                = 0x00000c08,
-       HOST_GS_SELECTOR                = 0x00000c0a,
-       HOST_TR_SELECTOR                = 0x00000c0c,
-       IO_BITMAP_A                     = 0x00002000,
-       IO_BITMAP_A_HIGH                = 0x00002001,
-       IO_BITMAP_B                     = 0x00002002,
-       IO_BITMAP_B_HIGH                = 0x00002003,
-       MSR_BITMAP                      = 0x00002004,
-       MSR_BITMAP_HIGH                 = 0x00002005,
-       VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
-       VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
-       VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
-       VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
-       VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
-       VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
-       PML_ADDRESS                     = 0x0000200e,
-       PML_ADDRESS_HIGH                = 0x0000200f,
-       TSC_OFFSET                      = 0x00002010,
-       TSC_OFFSET_HIGH                 = 0x00002011,
-       VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
-       VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
-       APIC_ACCESS_ADDR                = 0x00002014,
-       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
-       POSTED_INTR_DESC_ADDR           = 0x00002016,
-       POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
-       EPT_POINTER                     = 0x0000201a,
-       EPT_POINTER_HIGH                = 0x0000201b,
-       EOI_EXIT_BITMAP0                = 0x0000201c,
-       EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
-       EOI_EXIT_BITMAP1                = 0x0000201e,
-       EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
-       EOI_EXIT_BITMAP2                = 0x00002020,
-       EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
-       EOI_EXIT_BITMAP3                = 0x00002022,
-       EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
-       VMREAD_BITMAP                   = 0x00002026,
-       VMREAD_BITMAP_HIGH              = 0x00002027,
-       VMWRITE_BITMAP                  = 0x00002028,
-       VMWRITE_BITMAP_HIGH             = 0x00002029,
-       XSS_EXIT_BITMAP                 = 0x0000202C,
-       XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
-       ENCLS_EXITING_BITMAP            = 0x0000202E,
-       ENCLS_EXITING_BITMAP_HIGH       = 0x0000202F,
-       TSC_MULTIPLIER                  = 0x00002032,
-       TSC_MULTIPLIER_HIGH             = 0x00002033,
-       GUEST_PHYSICAL_ADDRESS          = 0x00002400,
-       GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
-       VMCS_LINK_POINTER               = 0x00002800,
-       VMCS_LINK_POINTER_HIGH          = 0x00002801,
-       GUEST_IA32_DEBUGCTL             = 0x00002802,
-       GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
-       GUEST_IA32_PAT                  = 0x00002804,
-       GUEST_IA32_PAT_HIGH             = 0x00002805,
-       GUEST_IA32_EFER                 = 0x00002806,
-       GUEST_IA32_EFER_HIGH            = 0x00002807,
-       GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
-       GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
-       GUEST_PDPTR0                    = 0x0000280a,
-       GUEST_PDPTR0_HIGH               = 0x0000280b,
-       GUEST_PDPTR1                    = 0x0000280c,
-       GUEST_PDPTR1_HIGH               = 0x0000280d,
-       GUEST_PDPTR2                    = 0x0000280e,
-       GUEST_PDPTR2_HIGH               = 0x0000280f,
-       GUEST_PDPTR3                    = 0x00002810,
-       GUEST_PDPTR3_HIGH               = 0x00002811,
-       GUEST_BNDCFGS                   = 0x00002812,
-       GUEST_BNDCFGS_HIGH              = 0x00002813,
-       HOST_IA32_PAT                   = 0x00002c00,
-       HOST_IA32_PAT_HIGH              = 0x00002c01,
-       HOST_IA32_EFER                  = 0x00002c02,
-       HOST_IA32_EFER_HIGH             = 0x00002c03,
-       HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
-       HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
-       PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
-       CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
-       EXCEPTION_BITMAP                = 0x00004004,
-       PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
-       PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
-       CR3_TARGET_COUNT                = 0x0000400a,
-       VM_EXIT_CONTROLS                = 0x0000400c,
-       VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
-       VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
-       VM_ENTRY_CONTROLS               = 0x00004012,
-       VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
-       VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
-       VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
-       VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
-       TPR_THRESHOLD                   = 0x0000401c,
-       SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
-       PLE_GAP                         = 0x00004020,
-       PLE_WINDOW                      = 0x00004022,
-       VM_INSTRUCTION_ERROR            = 0x00004400,
-       VM_EXIT_REASON                  = 0x00004402,
-       VM_EXIT_INTR_INFO               = 0x00004404,
-       VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
-       IDT_VECTORING_INFO_FIELD        = 0x00004408,
-       IDT_VECTORING_ERROR_CODE        = 0x0000440a,
-       VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
-       VMX_INSTRUCTION_INFO            = 0x0000440e,
-       GUEST_ES_LIMIT                  = 0x00004800,
-       GUEST_CS_LIMIT                  = 0x00004802,
-       GUEST_SS_LIMIT                  = 0x00004804,
-       GUEST_DS_LIMIT                  = 0x00004806,
-       GUEST_FS_LIMIT                  = 0x00004808,
-       GUEST_GS_LIMIT                  = 0x0000480a,
-       GUEST_LDTR_LIMIT                = 0x0000480c,
-       GUEST_TR_LIMIT                  = 0x0000480e,
-       GUEST_GDTR_LIMIT                = 0x00004810,
-       GUEST_IDTR_LIMIT                = 0x00004812,
-       GUEST_ES_AR_BYTES               = 0x00004814,
-       GUEST_CS_AR_BYTES               = 0x00004816,
-       GUEST_SS_AR_BYTES               = 0x00004818,
-       GUEST_DS_AR_BYTES               = 0x0000481a,
-       GUEST_FS_AR_BYTES               = 0x0000481c,
-       GUEST_GS_AR_BYTES               = 0x0000481e,
-       GUEST_LDTR_AR_BYTES             = 0x00004820,
-       GUEST_TR_AR_BYTES               = 0x00004822,
-       GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-       GUEST_ACTIVITY_STATE            = 0X00004826,
-       GUEST_SYSENTER_CS               = 0x0000482A,
-       VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
-       HOST_IA32_SYSENTER_CS           = 0x00004c00,
-       CR0_GUEST_HOST_MASK             = 0x00006000,
-       CR4_GUEST_HOST_MASK             = 0x00006002,
-       CR0_READ_SHADOW                 = 0x00006004,
-       CR4_READ_SHADOW                 = 0x00006006,
-       CR3_TARGET_VALUE0               = 0x00006008,
-       CR3_TARGET_VALUE1               = 0x0000600a,
-       CR3_TARGET_VALUE2               = 0x0000600c,
-       CR3_TARGET_VALUE3               = 0x0000600e,
-       EXIT_QUALIFICATION              = 0x00006400,
-       GUEST_LINEAR_ADDRESS            = 0x0000640a,
-       GUEST_CR0                       = 0x00006800,
-       GUEST_CR3                       = 0x00006802,
-       GUEST_CR4                       = 0x00006804,
-       GUEST_ES_BASE                   = 0x00006806,
-       GUEST_CS_BASE                   = 0x00006808,
-       GUEST_SS_BASE                   = 0x0000680a,
-       GUEST_DS_BASE                   = 0x0000680c,
-       GUEST_FS_BASE                   = 0x0000680e,
-       GUEST_GS_BASE                   = 0x00006810,
-       GUEST_LDTR_BASE                 = 0x00006812,
-       GUEST_TR_BASE                   = 0x00006814,
-       GUEST_GDTR_BASE                 = 0x00006816,
-       GUEST_IDTR_BASE                 = 0x00006818,
-       GUEST_DR7                       = 0x0000681a,
-       GUEST_RSP                       = 0x0000681c,
-       GUEST_RIP                       = 0x0000681e,
-       GUEST_RFLAGS                    = 0x00006820,
-       GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
-       GUEST_SYSENTER_ESP              = 0x00006824,
-       GUEST_SYSENTER_EIP              = 0x00006826,
-       HOST_CR0                        = 0x00006c00,
-       HOST_CR3                        = 0x00006c02,
-       HOST_CR4                        = 0x00006c04,
-       HOST_FS_BASE                    = 0x00006c06,
-       HOST_GS_BASE                    = 0x00006c08,
-       HOST_TR_BASE                    = 0x00006c0a,
-       HOST_GDTR_BASE                  = 0x00006c0c,
-       HOST_IDTR_BASE                  = 0x00006c0e,
-       HOST_IA32_SYSENTER_ESP          = 0x00006c10,
-       HOST_IA32_SYSENTER_EIP          = 0x00006c12,
-       HOST_RSP                        = 0x00006c14,
-       HOST_RIP                        = 0x00006c16,
-};
-
-struct vmx_msr_entry {
-       uint32_t index;
-       uint32_t reserved;
-       uint64_t value;
-} __attribute__ ((aligned(16)));
-
-#include "evmcs.h"
-
-static inline int vmxon(uint64_t phys)
-{
-       uint8_t ret;
-
-       __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [pa]"m"(phys)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline void vmxoff(void)
-{
-       __asm__ __volatile__("vmxoff");
-}
-
-static inline int vmclear(uint64_t vmcs_pa)
-{
-       uint8_t ret;
-
-       __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [pa]"m"(vmcs_pa)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline int vmptrld(uint64_t vmcs_pa)
-{
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return -1;
-
-       __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [pa]"m"(vmcs_pa)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline int vmptrst(uint64_t *value)
-{
-       uint64_t tmp;
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return evmcs_vmptrst(value);
-
-       __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
-               : [value]"=m"(tmp), [ret]"=rm"(ret)
-               : : "cc", "memory");
-
-       *value = tmp;
-       return ret;
-}
-
-/*
- * A wrapper around vmptrst that ignores errors and returns zero if the
- * vmptrst instruction fails.
- */
-static inline uint64_t vmptrstz(void)
-{
-       uint64_t value = 0;
-       vmptrst(&value);
-       return value;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmlaunch.
- */
-static inline int vmlaunch(void)
-{
-       int ret;
-
-       if (enable_evmcs)
-               return evmcs_vmlaunch();
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "vmwrite %%rsp, %[host_rsp];"
-                            "lea 1f(%%rip), %%rax;"
-                            "vmwrite %%rax, %[host_rip];"
-                            "vmlaunch;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"((uint64_t)HOST_RSP),
-                              [host_rip]"r"((uint64_t)HOST_RIP)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmresume.
- */
-static inline int vmresume(void)
-{
-       int ret;
-
-       if (enable_evmcs)
-               return evmcs_vmresume();
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "vmwrite %%rsp, %[host_rsp];"
-                            "lea 1f(%%rip), %%rax;"
-                            "vmwrite %%rax, %[host_rip];"
-                            "vmresume;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"((uint64_t)HOST_RSP),
-                              [host_rip]"r"((uint64_t)HOST_RIP)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-static inline void vmcall(void)
-{
-       /*
-        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
-        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
-        * use of this function is to exit to L1 from L2.  Clobber all other
-        * GPRs as L1 doesn't correctly preserve them during vmexits.
-        */
-       __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
-                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
-                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                              "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-static inline int vmread(uint64_t encoding, uint64_t *value)
-{
-       uint64_t tmp;
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return evmcs_vmread(encoding, value);
-
-       __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
-               : [value]"=rm"(tmp), [ret]"=rm"(ret)
-               : [encoding]"r"(encoding)
-               : "cc", "memory");
-
-       *value = tmp;
-       return ret;
-}
-
-/*
- * A wrapper around vmread that ignores errors and returns zero if the
- * vmread instruction fails.
- */
-static inline uint64_t vmreadz(uint64_t encoding)
-{
-       uint64_t value = 0;
-       vmread(encoding, &value);
-       return value;
-}
-
-static inline int vmwrite(uint64_t encoding, uint64_t value)
-{
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return evmcs_vmwrite(encoding, value);
-
-       __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [value]"rm"(value), [encoding]"r"(encoding)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline uint32_t vmcs_revision(void)
-{
-       return rdmsr(MSR_IA32_VMX_BASIC);
-}
-
-struct vmx_pages {
-       void *vmxon_hva;
-       uint64_t vmxon_gpa;
-       void *vmxon;
-
-       void *vmcs_hva;
-       uint64_t vmcs_gpa;
-       void *vmcs;
-
-       void *msr_hva;
-       uint64_t msr_gpa;
-       void *msr;
-
-       void *shadow_vmcs_hva;
-       uint64_t shadow_vmcs_gpa;
-       void *shadow_vmcs;
-
-       void *vmread_hva;
-       uint64_t vmread_gpa;
-       void *vmread;
-
-       void *vmwrite_hva;
-       uint64_t vmwrite_gpa;
-       void *vmwrite;
-
-       void *eptp_hva;
-       uint64_t eptp_gpa;
-       void *eptp;
-
-       void *apic_access_hva;
-       uint64_t apic_access_gpa;
-       void *apic_access;
-};
-
-union vmx_basic {
-       u64 val;
-       struct {
-               u32 revision;
-               u32     size:13,
-                       reserved1:3,
-                       width:1,
-                       dual:1,
-                       type:4,
-                       insouts:1,
-                       ctrl:1,
-                       vm_entry_exception_ctrl:1,
-                       reserved2:7;
-       };
-};
-
-union vmx_ctrl_msr {
-       u64 val;
-       struct {
-               u32 set, clr;
-       };
-};
-
-struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
-bool prepare_for_vmx_operation(struct vmx_pages *vmx);
-void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
-bool load_vmcs(struct vmx_pages *vmx);
-
-bool ept_1g_pages_supported(void);
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                  uint64_t nested_paddr, uint64_t paddr);
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                uint64_t nested_paddr, uint64_t paddr, uint64_t size);
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
-                       uint32_t memslot);
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
-                           uint64_t addr, uint64_t size);
-bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
-                 uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
-
-#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c

deleted file mode 100644 (file)

index 7abbf88..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) support
- */
-
-#include <errno.h>
-#include <linux/bits.h>
-#include <linux/sizes.h>
-
-#include "kvm_util.h"
-
-#include <gic.h>
-#include "gic_private.h"
-#include "processor.h"
-#include "spinlock.h"
-
-static const struct gic_common_ops *gic_common_ops;
-static struct spinlock gic_lock;
-
-static void gic_cpu_init(unsigned int cpu)
-{
-       gic_common_ops->gic_cpu_init(cpu);
-}
-
-static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
-{
-       const struct gic_common_ops *gic_ops = NULL;
-
-       spin_lock(&gic_lock);
-
-       /* Distributor initialization is needed only once per VM */
-       if (gic_common_ops) {
-               spin_unlock(&gic_lock);
-               return;
-       }
-
-       if (type == GIC_V3)
-               gic_ops = &gicv3_ops;
-
-       GUEST_ASSERT(gic_ops);
-
-       gic_ops->gic_init(nr_cpus);
-       gic_common_ops = gic_ops;
-
-       /* Make sure that the initialized data is visible to all the vCPUs */
-       dsb(sy);
-
-       spin_unlock(&gic_lock);
-}
-
-void gic_init(enum gic_type type, unsigned int nr_cpus)
-{
-       uint32_t cpu = guest_get_vcpuid();
-
-       GUEST_ASSERT(type < GIC_TYPE_MAX);
-       GUEST_ASSERT(nr_cpus);
-
-       gic_dist_init(type, nr_cpus);
-       gic_cpu_init(cpu);
-}
-
-void gic_irq_enable(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_enable(intid);
-}
-
-void gic_irq_disable(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_disable(intid);
-}
-
-unsigned int gic_get_and_ack_irq(void)
-{
-       uint64_t irqstat;
-       unsigned int intid;
-
-       GUEST_ASSERT(gic_common_ops);
-
-       irqstat = gic_common_ops->gic_read_iar();
-       intid = irqstat & GENMASK(23, 0);
-
-       return intid;
-}
-
-void gic_set_eoi(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_write_eoir(intid);
-}
-
-void gic_set_dir(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_write_dir(intid);
-}
-
-void gic_set_eoi_split(bool split)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_set_eoi_split(split);
-}
-
-void gic_set_priority_mask(uint64_t pmr)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_set_priority_mask(pmr);
-}
-
-void gic_set_priority(unsigned int intid, unsigned int prio)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_set_priority(intid, prio);
-}
-
-void gic_irq_set_active(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_set_active(intid);
-}
-
-void gic_irq_clear_active(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_clear_active(intid);
-}
-
-bool gic_irq_get_active(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       return gic_common_ops->gic_irq_get_active(intid);
-}
-
-void gic_irq_set_pending(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_set_pending(intid);
-}
-
-void gic_irq_clear_pending(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_clear_pending(intid);
-}
-
-bool gic_irq_get_pending(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       return gic_common_ops->gic_irq_get_pending(intid);
-}
-
-void gic_irq_set_config(unsigned int intid, bool is_edge)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_set_config(intid, is_edge);
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h

deleted file mode 100644 (file)

index d24e9ec..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) private defines that's only
- * shared among the GIC library code.
- */
-
-#ifndef SELFTEST_KVM_GIC_PRIVATE_H
-#define SELFTEST_KVM_GIC_PRIVATE_H
-
-struct gic_common_ops {
-       void (*gic_init)(unsigned int nr_cpus);
-       void (*gic_cpu_init)(unsigned int cpu);
-       void (*gic_irq_enable)(unsigned int intid);
-       void (*gic_irq_disable)(unsigned int intid);
-       uint64_t (*gic_read_iar)(void);
-       void (*gic_write_eoir)(uint32_t irq);
-       void (*gic_write_dir)(uint32_t irq);
-       void (*gic_set_eoi_split)(bool split);
-       void (*gic_set_priority_mask)(uint64_t mask);
-       void (*gic_set_priority)(uint32_t intid, uint32_t prio);
-       void (*gic_irq_set_active)(uint32_t intid);
-       void (*gic_irq_clear_active)(uint32_t intid);
-       bool (*gic_irq_get_active)(uint32_t intid);
-       void (*gic_irq_set_pending)(uint32_t intid);
-       void (*gic_irq_clear_pending)(uint32_t intid);
-       bool (*gic_irq_get_pending)(uint32_t intid);
-       void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
-};
-
-extern const struct gic_common_ops gicv3_ops;
-
-#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c

deleted file mode 100644 (file)

index 66d0550..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ /dev/null
@@ -1,427 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) v3 support
- */
-
-#include <linux/sizes.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "delay.h"
-
-#include "gic.h"
-#include "gic_v3.h"
-#include "gic_private.h"
-
-#define GICV3_MAX_CPUS                 512
-
-#define GICD_INT_DEF_PRI               0xa0
-#define GICD_INT_DEF_PRI_X4            ((GICD_INT_DEF_PRI << 24) |\
-                                       (GICD_INT_DEF_PRI << 16) |\
-                                       (GICD_INT_DEF_PRI << 8) |\
-                                       GICD_INT_DEF_PRI)
-
-#define ICC_PMR_DEF_PRIO               0xf0
-
-struct gicv3_data {
-       unsigned int nr_cpus;
-       unsigned int nr_spis;
-};
-
-#define sgi_base_from_redist(redist_base)      (redist_base + SZ_64K)
-#define DIST_BIT                               (1U << 31)
-
-enum gicv3_intid_range {
-       SGI_RANGE,
-       PPI_RANGE,
-       SPI_RANGE,
-       INVALID_RANGE,
-};
-
-static struct gicv3_data gicv3_data;
-
-static void gicv3_gicd_wait_for_rwp(void)
-{
-       unsigned int count = 100000; /* 1s */
-
-       while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
-               GUEST_ASSERT(count--);
-               udelay(10);
-       }
-}
-
-static inline volatile void *gicr_base_cpu(uint32_t cpu)
-{
-       /* Align all the redistributors sequentially */
-       return GICR_BASE_GVA + cpu * SZ_64K * 2;
-}
-
-static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
-{
-       unsigned int count = 100000; /* 1s */
-
-       while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
-               GUEST_ASSERT(count--);
-               udelay(10);
-       }
-}
-
-static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
-{
-       if (cpu_or_dist & DIST_BIT)
-               gicv3_gicd_wait_for_rwp();
-       else
-               gicv3_gicr_wait_for_rwp(cpu_or_dist);
-}
-
-static enum gicv3_intid_range get_intid_range(unsigned int intid)
-{
-       switch (intid) {
-       case 0 ... 15:
-               return SGI_RANGE;
-       case 16 ... 31:
-               return PPI_RANGE;
-       case 32 ... 1019:
-               return SPI_RANGE;
-       }
-
-       /* We should not be reaching here */
-       GUEST_ASSERT(0);
-
-       return INVALID_RANGE;
-}
-
-static uint64_t gicv3_read_iar(void)
-{
-       uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
-
-       dsb(sy);
-       return irqstat;
-}
-
-static void gicv3_write_eoir(uint32_t irq)
-{
-       write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
-       isb();
-}
-
-static void gicv3_write_dir(uint32_t irq)
-{
-       write_sysreg_s(irq, SYS_ICC_DIR_EL1);
-       isb();
-}
-
-static void gicv3_set_priority_mask(uint64_t mask)
-{
-       write_sysreg_s(mask, SYS_ICC_PMR_EL1);
-}
-
-static void gicv3_set_eoi_split(bool split)
-{
-       uint32_t val;
-
-       /*
-        * All other fields are read-only, so no need to read CTLR first. In
-        * fact, the kernel does the same.
-        */
-       val = split ? (1U << 1) : 0;
-       write_sysreg_s(val, SYS_ICC_CTLR_EL1);
-       isb();
-}
-
-uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
-{
-       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
-                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
-       return readl(base + offset);
-}
-
-void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
-{
-       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
-                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
-       writel(reg_val, base + offset);
-}
-
-uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
-{
-       return gicv3_reg_readl(cpu_or_dist, offset) & mask;
-}
-
-void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
-               uint32_t mask, uint32_t reg_val)
-{
-       uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
-
-       tmp |= (reg_val & mask);
-       gicv3_reg_writel(cpu_or_dist, offset, tmp);
-}
-
-/*
- * We use a single offset for the distributor and redistributor maps as they
- * have the same value in both. The only exceptions are registers that only
- * exist in one and not the other, like GICR_WAKER that doesn't exist in the
- * distributor map. Such registers are conveniently marked as reserved in the
- * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
- * marked as "Reserved" in the Distributor map.
- */
-static void gicv3_access_reg(uint32_t intid, uint64_t offset,
-               uint32_t reg_bits, uint32_t bits_per_field,
-               bool write, uint32_t *val)
-{
-       uint32_t cpu = guest_get_vcpuid();
-       enum gicv3_intid_range intid_range = get_intid_range(intid);
-       uint32_t fields_per_reg, index, mask, shift;
-       uint32_t cpu_or_dist;
-
-       GUEST_ASSERT(bits_per_field <= reg_bits);
-       GUEST_ASSERT(!write || *val < (1U << bits_per_field));
-       /*
-        * This function does not support 64 bit accesses. Just asserting here
-        * until we implement readq/writeq.
-        */
-       GUEST_ASSERT(reg_bits == 32);
-
-       fields_per_reg = reg_bits / bits_per_field;
-       index = intid % fields_per_reg;
-       shift = index * bits_per_field;
-       mask = ((1U << bits_per_field) - 1) << shift;
-
-       /* Set offset to the actual register holding intid's config. */
-       offset += (intid / fields_per_reg) * (reg_bits / 8);
-
-       cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
-
-       if (write)
-               gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
-       *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
-}
-
-static void gicv3_write_reg(uint32_t intid, uint64_t offset,
-               uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
-{
-       gicv3_access_reg(intid, offset, reg_bits,
-                       bits_per_field, true, &val);
-}
-
-static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
-               uint32_t reg_bits, uint32_t bits_per_field)
-{
-       uint32_t val;
-
-       gicv3_access_reg(intid, offset, reg_bits,
-                       bits_per_field, false, &val);
-       return val;
-}
-
-static void gicv3_set_priority(uint32_t intid, uint32_t prio)
-{
-       gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
-}
-
-/* Sets the intid to be level-sensitive or edge-triggered. */
-static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
-{
-       uint32_t val;
-
-       /* N/A for private interrupts. */
-       GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
-       val = is_edge ? 2 : 0;
-       gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
-}
-
-static void gicv3_irq_enable(uint32_t intid)
-{
-       bool is_spi = get_intid_range(intid) == SPI_RANGE;
-       uint32_t cpu = guest_get_vcpuid();
-
-       gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
-       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
-}
-
-static void gicv3_irq_disable(uint32_t intid)
-{
-       bool is_spi = get_intid_range(intid) == SPI_RANGE;
-       uint32_t cpu = guest_get_vcpuid();
-
-       gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
-       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
-}
-
-static void gicv3_irq_set_active(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
-}
-
-static void gicv3_irq_clear_active(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
-}
-
-static bool gicv3_irq_get_active(uint32_t intid)
-{
-       return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
-}
-
-static void gicv3_irq_set_pending(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
-}
-
-static void gicv3_irq_clear_pending(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
-}
-
-static bool gicv3_irq_get_pending(uint32_t intid)
-{
-       return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
-}
-
-static void gicv3_enable_redist(volatile void *redist_base)
-{
-       uint32_t val = readl(redist_base + GICR_WAKER);
-       unsigned int count = 100000; /* 1s */
-
-       val &= ~GICR_WAKER_ProcessorSleep;
-       writel(val, redist_base + GICR_WAKER);
-
-       /* Wait until the processor is 'active' */
-       while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
-               GUEST_ASSERT(count--);
-               udelay(10);
-       }
-}
-
-static void gicv3_cpu_init(unsigned int cpu)
-{
-       volatile void *sgi_base;
-       unsigned int i;
-       volatile void *redist_base_cpu;
-
-       GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
-
-       redist_base_cpu = gicr_base_cpu(cpu);
-       sgi_base = sgi_base_from_redist(redist_base_cpu);
-
-       gicv3_enable_redist(redist_base_cpu);
-
-       /*
-        * Mark all the SGI and PPI interrupts as non-secure Group-1.
-        * Also, deactivate and disable them.
-        */
-       writel(~0, sgi_base + GICR_IGROUPR0);
-       writel(~0, sgi_base + GICR_ICACTIVER0);
-       writel(~0, sgi_base + GICR_ICENABLER0);
-
-       /* Set a default priority for all the SGIs and PPIs */
-       for (i = 0; i < 32; i += 4)
-               writel(GICD_INT_DEF_PRI_X4,
-                               sgi_base + GICR_IPRIORITYR0 + i);
-
-       gicv3_gicr_wait_for_rwp(cpu);
-
-       /* Enable the GIC system register (ICC_*) access */
-       write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
-                       SYS_ICC_SRE_EL1);
-
-       /* Set a default priority threshold */
-       write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
-
-       /* Enable non-secure Group-1 interrupts */
-       write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
-}
-
-static void gicv3_dist_init(void)
-{
-       unsigned int i;
-
-       /* Disable the distributor until we set things up */
-       writel(0, GICD_BASE_GVA + GICD_CTLR);
-       gicv3_gicd_wait_for_rwp();
-
-       /*
-        * Mark all the SPI interrupts as non-secure Group-1.
-        * Also, deactivate and disable them.
-        */
-       for (i = 32; i < gicv3_data.nr_spis; i += 32) {
-               writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
-               writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
-               writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
-       }
-
-       /* Set a default priority for all the SPIs */
-       for (i = 32; i < gicv3_data.nr_spis; i += 4)
-               writel(GICD_INT_DEF_PRI_X4,
-                               GICD_BASE_GVA + GICD_IPRIORITYR + i);
-
-       /* Wait for the settings to sync-in */
-       gicv3_gicd_wait_for_rwp();
-
-       /* Finally, enable the distributor globally with ARE */
-       writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
-                       GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
-       gicv3_gicd_wait_for_rwp();
-}
-
-static void gicv3_init(unsigned int nr_cpus)
-{
-       GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
-
-       gicv3_data.nr_cpus = nr_cpus;
-       gicv3_data.nr_spis = GICD_TYPER_SPIS(
-                               readl(GICD_BASE_GVA + GICD_TYPER));
-       if (gicv3_data.nr_spis > 1020)
-               gicv3_data.nr_spis = 1020;
-
-       /*
-        * Initialize only the distributor for now.
-        * The redistributor and CPU interfaces are initialized
-        * later for every PE.
-        */
-       gicv3_dist_init();
-}
-
-const struct gic_common_ops gicv3_ops = {
-       .gic_init = gicv3_init,
-       .gic_cpu_init = gicv3_cpu_init,
-       .gic_irq_enable = gicv3_irq_enable,
-       .gic_irq_disable = gicv3_irq_disable,
-       .gic_read_iar = gicv3_read_iar,
-       .gic_write_eoir = gicv3_write_eoir,
-       .gic_write_dir = gicv3_write_dir,
-       .gic_set_priority_mask = gicv3_set_priority_mask,
-       .gic_set_eoi_split = gicv3_set_eoi_split,
-       .gic_set_priority = gicv3_set_priority,
-       .gic_irq_set_active = gicv3_irq_set_active,
-       .gic_irq_clear_active = gicv3_irq_clear_active,
-       .gic_irq_get_active = gicv3_irq_get_active,
-       .gic_irq_set_pending = gicv3_irq_set_pending,
-       .gic_irq_clear_pending = gicv3_irq_clear_pending,
-       .gic_irq_get_pending = gicv3_irq_get_pending,
-       .gic_irq_set_config = gicv3_irq_set_config,
-};
-
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
-                          vm_paddr_t pend_table)
-{
-       volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
-
-       u32 ctlr;
-       u64 val;
-
-       val = (cfg_table |
-              GICR_PROPBASER_InnerShareable |
-              GICR_PROPBASER_RaWaWb |
-              ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
-       writeq_relaxed(val, rdist_base + GICR_PROPBASER);
-
-       val = (pend_table |
-              GICR_PENDBASER_InnerShareable |
-              GICR_PENDBASER_RaWaWb);
-       writeq_relaxed(val, rdist_base + GICR_PENDBASER);
-
-       ctlr = readl_relaxed(rdist_base + GICR_CTLR);
-       ctlr |= GICR_CTLR_ENABLE_LPIS;
-       writel_relaxed(ctlr, rdist_base + GICR_CTLR);
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c

deleted file mode 100644 (file)

index 09f2705..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
- * over in the kernel tree.
- */
-
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <asm/kvm_para.h>
-#include <asm/kvm.h>
-
-#include "kvm_util.h"
-#include "vgic.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "processor.h"
-
-static u64 its_read_u64(unsigned long offset)
-{
-       return readq_relaxed(GITS_BASE_GVA + offset);
-}
-
-static void its_write_u64(unsigned long offset, u64 val)
-{
-       writeq_relaxed(val, GITS_BASE_GVA + offset);
-}
-
-static u32 its_read_u32(unsigned long offset)
-{
-       return readl_relaxed(GITS_BASE_GVA + offset);
-}
-
-static void its_write_u32(unsigned long offset, u32 val)
-{
-       writel_relaxed(val, GITS_BASE_GVA + offset);
-}
-
-static unsigned long its_find_baser(unsigned int type)
-{
-       int i;
-
-       for (i = 0; i < GITS_BASER_NR_REGS; i++) {
-               u64 baser;
-               unsigned long offset = GITS_BASER + (i * sizeof(baser));
-
-               baser = its_read_u64(offset);
-               if (GITS_BASER_TYPE(baser) == type)
-                       return offset;
-       }
-
-       GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
-       return -1;
-}
-
-static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
-{
-       unsigned long offset = its_find_baser(type);
-       u64 baser;
-
-       baser = ((size / SZ_64K) - 1) |
-               GITS_BASER_PAGE_SIZE_64K |
-               GITS_BASER_InnerShareable |
-               base |
-               GITS_BASER_RaWaWb |
-               GITS_BASER_VALID;
-
-       its_write_u64(offset, baser);
-}
-
-static void its_install_cmdq(vm_paddr_t base, size_t size)
-{
-       u64 cbaser;
-
-       cbaser = ((size / SZ_4K) - 1) |
-                GITS_CBASER_InnerShareable |
-                base |
-                GITS_CBASER_RaWaWb |
-                GITS_CBASER_VALID;
-
-       its_write_u64(GITS_CBASER, cbaser);
-}
-
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
-             vm_paddr_t device_tbl, size_t device_tbl_sz,
-             vm_paddr_t cmdq, size_t cmdq_size)
-{
-       u32 ctlr;
-
-       its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
-       its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
-       its_install_cmdq(cmdq, cmdq_size);
-
-       ctlr = its_read_u32(GITS_CTLR);
-       ctlr |= GITS_CTLR_ENABLE;
-       its_write_u32(GITS_CTLR, ctlr);
-}
-
-struct its_cmd_block {
-       union {
-               u64     raw_cmd[4];
-               __le64  raw_cmd_le[4];
-       };
-};
-
-static inline void its_fixup_cmd(struct its_cmd_block *cmd)
-{
-       /* Let's fixup BE commands */
-       cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
-       cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
-       cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
-       cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
-}
-
-static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
-{
-       u64 mask = GENMASK_ULL(h, l);
-       *raw_cmd &= ~mask;
-       *raw_cmd |= (val << l) & mask;
-}
-
-static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
-{
-       its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
-}
-
-static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
-{
-       its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
-}
-
-static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
-{
-       its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
-}
-
-static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
-{
-       its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
-}
-
-static void its_encode_size(struct its_cmd_block *cmd, u8 size)
-{
-       its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
-}
-
-static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
-{
-       its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
-}
-
-static void its_encode_valid(struct its_cmd_block *cmd, int valid)
-{
-       its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
-}
-
-static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
-{
-       its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
-}
-
-static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
-{
-       its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
-}
-
-#define GITS_CMDQ_POLL_ITERATIONS      0
-
-static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
-{
-       u64 cwriter = its_read_u64(GITS_CWRITER);
-       struct its_cmd_block *dst = cmdq_base + cwriter;
-       u64 cbaser = its_read_u64(GITS_CBASER);
-       size_t cmdq_size;
-       u64 next;
-       int i;
-
-       cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
-
-       its_fixup_cmd(cmd);
-
-       WRITE_ONCE(*dst, *cmd);
-       dsb(ishst);
-       next = (cwriter + sizeof(*cmd)) % cmdq_size;
-       its_write_u64(GITS_CWRITER, next);
-
-       /*
-        * Polling isn't necessary considering KVM's ITS emulation at the time
-        * of writing this, as the CMDQ is processed synchronously after a write
-        * to CWRITER.
-        */
-       for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
-               __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
-                              "ITS didn't process command at offset %lu after %d iterations\n",
-                              cwriter, i);
-
-               cpu_relax();
-       }
-}
-
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
-                      size_t itt_size, bool valid)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_MAPD);
-       its_encode_devid(&cmd, device_id);
-       its_encode_size(&cmd, ilog2(itt_size) - 1);
-       its_encode_itt(&cmd, itt_base);
-       its_encode_valid(&cmd, valid);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_MAPC);
-       its_encode_collection(&cmd, collection_id);
-       its_encode_target(&cmd, vcpu_id);
-       its_encode_valid(&cmd, valid);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
-                       u32 collection_id, u32 intid)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_MAPTI);
-       its_encode_devid(&cmd, device_id);
-       its_encode_event_id(&cmd, event_id);
-       its_encode_phys_id(&cmd, intid);
-       its_encode_collection(&cmd, collection_id);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_INVALL);
-       its_encode_collection(&cmd, collection_id);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S

deleted file mode 100644 (file)

index 0e443ea..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-.macro save_registers
-       add     sp, sp, #-16 * 17
-
-       stp     x0, x1, [sp, #16 * 0]
-       stp     x2, x3, [sp, #16 * 1]
-       stp     x4, x5, [sp, #16 * 2]
-       stp     x6, x7, [sp, #16 * 3]
-       stp     x8, x9, [sp, #16 * 4]
-       stp     x10, x11, [sp, #16 * 5]
-       stp     x12, x13, [sp, #16 * 6]
-       stp     x14, x15, [sp, #16 * 7]
-       stp     x16, x17, [sp, #16 * 8]
-       stp     x18, x19, [sp, #16 * 9]
-       stp     x20, x21, [sp, #16 * 10]
-       stp     x22, x23, [sp, #16 * 11]
-       stp     x24, x25, [sp, #16 * 12]
-       stp     x26, x27, [sp, #16 * 13]
-       stp     x28, x29, [sp, #16 * 14]
-
-       /*
-        * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
-        * at it. It will _not_ be used to restore the sp on return from the
-        * exception so handlers can not update it.
-        */
-       add     x1, sp, #16 * 17
-       stp     x30, x1, [sp, #16 * 15] /* x30, SP */
-
-       mrs     x1, elr_el1
-       mrs     x2, spsr_el1
-       stp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
-.endm
-
-.macro restore_registers
-       ldp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
-       msr     elr_el1, x1
-       msr     spsr_el1, x2
-
-       /* sp is not restored */
-       ldp     x30, xzr, [sp, #16 * 15] /* x30, SP */
-
-       ldp     x28, x29, [sp, #16 * 14]
-       ldp     x26, x27, [sp, #16 * 13]
-       ldp     x24, x25, [sp, #16 * 12]
-       ldp     x22, x23, [sp, #16 * 11]
-       ldp     x20, x21, [sp, #16 * 10]
-       ldp     x18, x19, [sp, #16 * 9]
-       ldp     x16, x17, [sp, #16 * 8]
-       ldp     x14, x15, [sp, #16 * 7]
-       ldp     x12, x13, [sp, #16 * 6]
-       ldp     x10, x11, [sp, #16 * 5]
-       ldp     x8, x9, [sp, #16 * 4]
-       ldp     x6, x7, [sp, #16 * 3]
-       ldp     x4, x5, [sp, #16 * 2]
-       ldp     x2, x3, [sp, #16 * 1]
-       ldp     x0, x1, [sp, #16 * 0]
-
-       add     sp, sp, #16 * 17
-
-       eret
-.endm
-
-.pushsection ".entry.text", "ax"
-.balign 0x800
-.global vectors
-vectors:
-.popsection
-
-.set   vector, 0
-
-/*
- * Build an exception handler for vector and append a jump to it into
- * vectors (while making sure that it's 0x80 aligned).
- */
-.macro HANDLER, label
-handler_\label:
-       save_registers
-       mov     x0, sp
-       mov     x1, #vector
-       bl      route_exception
-       restore_registers
-
-.pushsection ".entry.text", "ax"
-.balign 0x80
-       b       handler_\label
-.popsection
-
-.set   vector, vector + 1
-.endm
-
-.macro HANDLER_INVALID
-.pushsection ".entry.text", "ax"
-.balign 0x80
-/* This will abort so no need to save and restore registers. */
-       mov     x0, #vector
-       mov     x1, #0 /* ec */
-       mov     x2, #0 /* valid_ec */
-       b       kvm_exit_unexpected_exception
-.popsection
-
-.set   vector, vector + 1
-.endm
-
-/*
- * Caution: be sure to not add anything between the declaration of vectors
- * above and these macro calls that will build the vectors table below it.
- */
-       HANDLER_INVALID                         // Synchronous EL1t
-       HANDLER_INVALID                         // IRQ EL1t
-       HANDLER_INVALID                         // FIQ EL1t
-       HANDLER_INVALID                         // Error EL1t
-
-       HANDLER el1h_sync                       // Synchronous EL1h
-       HANDLER el1h_irq                        // IRQ EL1h
-       HANDLER el1h_fiq                        // FIQ EL1h
-       HANDLER el1h_error                      // Error EL1h
-
-       HANDLER el0_sync_64                     // Synchronous 64-bit EL0
-       HANDLER el0_irq_64                      // IRQ 64-bit EL0
-       HANDLER el0_fiq_64                      // FIQ 64-bit EL0
-       HANDLER el0_error_64                    // Error 64-bit EL0
-
-       HANDLER el0_sync_32                     // Synchronous 32-bit EL0
-       HANDLER el0_irq_32                      // IRQ 32-bit EL0
-       HANDLER el0_fiq_32                      // FIQ 32-bit EL0
-       HANDLER el0_error_32                    // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c

deleted file mode 100644 (file)

index 7ba3aa3..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ /dev/null
@@ -1,647 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AArch64 code
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-
-#include <linux/compiler.h>
-#include <assert.h>
-
-#include "guest_modes.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "ucall_common.h"
-
-#include <linux/bitfield.h>
-#include <linux/sizes.h>
-
-#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN    0xac0000
-
-static vm_vaddr_t exception_handlers;
-
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
-       return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
-static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
-       uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
-
-       return (gva >> shift) & mask;
-}
-
-static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
-       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
-       TEST_ASSERT(vm->pgtable_levels == 4,
-               "Mode %d does not have 4 page table levels", vm->mode);
-
-       return (gva >> shift) & mask;
-}
-
-static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
-       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
-       TEST_ASSERT(vm->pgtable_levels >= 3,
-               "Mode %d does not have >= 3 page table levels", vm->mode);
-
-       return (gva >> shift) & mask;
-}
-
-static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-       return (gva >> vm->page_shift) & mask;
-}
-
-static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
-{
-       return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
-           (vm->pa_bits > 48 || vm->va_bits > 48);
-}
-
-static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
-{
-       uint64_t pte;
-
-       if (use_lpa2_pte_format(vm)) {
-               pte = pa & GENMASK(49, vm->page_shift);
-               pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
-               attrs &= ~GENMASK(9, 8);
-       } else {
-               pte = pa & GENMASK(47, vm->page_shift);
-               if (vm->page_shift == 16)
-                       pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
-       }
-       pte |= attrs;
-
-       return pte;
-}
-
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
-{
-       uint64_t pa;
-
-       if (use_lpa2_pte_format(vm)) {
-               pa = pte & GENMASK(49, vm->page_shift);
-               pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
-       } else {
-               pa = pte & GENMASK(47, vm->page_shift);
-               if (vm->page_shift == 16)
-                       pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
-       }
-
-       return pa;
-}
-
-static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
-{
-       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
-       return 1 << (vm->va_bits - shift);
-}
-
-static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
-{
-       return 1 << (vm->page_shift - 3);
-}
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
-       size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
-
-       if (vm->pgd_created)
-               return;
-
-       vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
-                                    KVM_GUEST_PAGE_TABLE_MIN_PADDR,
-                                    vm->memslots[MEM_REGION_PT]);
-       vm->pgd_created = true;
-}
-
-static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-                        uint64_t flags)
-{
-       uint8_t attr_idx = flags & 7;
-       uint64_t *ptep;
-
-       TEST_ASSERT((vaddr % vm->page_size) == 0,
-               "Virtual address not on page boundary,\n"
-               "  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-               (vaddr >> vm->page_shift)),
-               "Invalid virtual address, vaddr: 0x%lx", vaddr);
-       TEST_ASSERT((paddr % vm->page_size) == 0,
-               "Physical address not on page boundary,\n"
-               "  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
-       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-               "Physical address beyond beyond maximum supported,\n"
-               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-               paddr, vm->max_gfn, vm->page_size);
-
-       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
-       if (!*ptep)
-               *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-
-       switch (vm->pgtable_levels) {
-       case 4:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
-               if (!*ptep)
-                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-               /* fall through */
-       case 3:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
-               if (!*ptep)
-                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-               /* fall through */
-       case 2:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
-               break;
-       default:
-               TEST_FAIL("Page table levels must be 2, 3, or 4");
-       }
-
-       *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3);  /* AF */
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
-       uint64_t attr_idx = MT_NORMAL;
-
-       _virt_pg_map(vm, vaddr, paddr, attr_idx);
-}
-
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       uint64_t *ptep;
-
-       if (!vm->pgd_created)
-               goto unmapped_gva;
-
-       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
-       if (!ptep)
-               goto unmapped_gva;
-
-       switch (vm->pgtable_levels) {
-       case 4:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
-               if (!ptep)
-                       goto unmapped_gva;
-               /* fall through */
-       case 3:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
-               if (!ptep)
-                       goto unmapped_gva;
-               /* fall through */
-       case 2:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
-               if (!ptep)
-                       goto unmapped_gva;
-               break;
-       default:
-               TEST_FAIL("Page table levels must be 2, 3, or 4");
-       }
-
-       return ptep;
-
-unmapped_gva:
-       TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-       exit(EXIT_FAILURE);
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       uint64_t *ptep = virt_get_pte_hva(vm, gva);
-
-       return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
-}
-
-static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
-{
-#ifdef DEBUG
-       static const char * const type[] = { "", "pud", "pmd", "pte" };
-       uint64_t pte, *ptep;
-
-       if (level == 4)
-               return;
-
-       for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
-               ptep = addr_gpa2hva(vm, pte);
-               if (!*ptep)
-                       continue;
-               fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
-               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
-       }
-#endif
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
-       int level = 4 - (vm->pgtable_levels - 1);
-       uint64_t pgd, *ptep;
-
-       if (!vm->pgd_created)
-               return;
-
-       for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
-               ptep = addr_gpa2hva(vm, pgd);
-               if (!*ptep)
-                       continue;
-               fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
-               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
-       }
-}
-
-void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
-{
-       struct kvm_vcpu_init default_init = { .target = -1, };
-       struct kvm_vm *vm = vcpu->vm;
-       uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
-
-       if (!init)
-               init = &default_init;
-
-       if (init->target == -1) {
-               struct kvm_vcpu_init preferred;
-               vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
-               init->target = preferred.target;
-       }
-
-       vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
-
-       /*
-        * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
-        * registers, which the variable argument list macros do.
-        */
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
-
-       sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
-       tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
-
-       /* Configure base granule size */
-       switch (vm->mode) {
-       case VM_MODE_PXXV48_4K:
-               TEST_FAIL("AArch64 does not support 4K sized pages "
-                         "with ANY-bit physical address ranges");
-       case VM_MODE_P52V48_64K:
-       case VM_MODE_P48V48_64K:
-       case VM_MODE_P40V48_64K:
-       case VM_MODE_P36V48_64K:
-               tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
-               break;
-       case VM_MODE_P52V48_16K:
-       case VM_MODE_P48V48_16K:
-       case VM_MODE_P40V48_16K:
-       case VM_MODE_P36V48_16K:
-       case VM_MODE_P36V47_16K:
-               tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
-               break;
-       case VM_MODE_P52V48_4K:
-       case VM_MODE_P48V48_4K:
-       case VM_MODE_P40V48_4K:
-       case VM_MODE_P36V48_4K:
-               tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
-               break;
-       default:
-               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
-       }
-
-       ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
-
-       /* Configure output size */
-       switch (vm->mode) {
-       case VM_MODE_P52V48_4K:
-       case VM_MODE_P52V48_16K:
-       case VM_MODE_P52V48_64K:
-               tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
-               ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
-               break;
-       case VM_MODE_P48V48_4K:
-       case VM_MODE_P48V48_16K:
-       case VM_MODE_P48V48_64K:
-               tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
-               break;
-       case VM_MODE_P40V48_4K:
-       case VM_MODE_P40V48_16K:
-       case VM_MODE_P40V48_64K:
-               tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
-               break;
-       case VM_MODE_P36V48_4K:
-       case VM_MODE_P36V48_16K:
-       case VM_MODE_P36V48_64K:
-       case VM_MODE_P36V47_16K:
-               tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
-               break;
-       default:
-               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
-       }
-
-       sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
-       /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
-       tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
-       tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
-       if (use_lpa2_pte_format(vm))
-               tcr_el1 |= (1ul << 59) /* DS */;
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
-       uint64_t pstate, pc;
-
-       pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
-       pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-
-       fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
-               indent, "", pstate, pc);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-}
-
-static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                          struct kvm_vcpu_init *init)
-{
-       size_t stack_size;
-       uint64_t stack_vaddr;
-       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
-
-       stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
-                                            vm->page_size;
-       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
-                                      DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
-                                      MEM_REGION_DATA);
-
-       aarch64_vcpu_setup(vcpu, init);
-
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
-       return vcpu;
-}
-
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 struct kvm_vcpu_init *init, void *guest_code)
-{
-       struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
-
-       vcpu_arch_set_entry_point(vcpu, guest_code);
-
-       return vcpu;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       return __aarch64_vcpu_add(vm, vcpu_id, NULL);
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
-       va_list ap;
-       int i;
-
-       TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
-                   "  num: %u", num);
-
-       va_start(ap, num);
-
-       for (i = 0; i < num; i++) {
-               vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
-                            va_arg(ap, uint64_t));
-       }
-
-       va_end(ap);
-}
-
-void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
-{
-       ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
-       while (1)
-               ;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
-               return;
-
-       if (uc.args[2]) /* valid_ec */ {
-               assert(VECTOR_IS_SYNC(uc.args[0]));
-               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
-                         uc.args[0], uc.args[1]);
-       } else {
-               assert(!VECTOR_IS_SYNC(uc.args[0]));
-               TEST_FAIL("Unexpected exception (vector:0x%lx)",
-                         uc.args[0]);
-       }
-}
-
-struct handlers {
-       handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
-};
-
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
-{
-       extern char vectors;
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
-}
-
-void route_exception(struct ex_regs *regs, int vector)
-{
-       struct handlers *handlers = (struct handlers *)exception_handlers;
-       bool valid_ec;
-       int ec = 0;
-
-       switch (vector) {
-       case VECTOR_SYNC_CURRENT:
-       case VECTOR_SYNC_LOWER_64:
-               ec = ESR_ELx_EC(read_sysreg(esr_el1));
-               valid_ec = true;
-               break;
-       case VECTOR_IRQ_CURRENT:
-       case VECTOR_IRQ_LOWER_64:
-       case VECTOR_FIQ_CURRENT:
-       case VECTOR_FIQ_LOWER_64:
-       case VECTOR_ERROR_CURRENT:
-       case VECTOR_ERROR_LOWER_64:
-               ec = 0;
-               valid_ec = false;
-               break;
-       default:
-               valid_ec = false;
-               goto unexpected_exception;
-       }
-
-       if (handlers && handlers->exception_handlers[vector][ec])
-               return handlers->exception_handlers[vector][ec](regs);
-
-unexpected_exception:
-       kvm_exit_unexpected_exception(vector, ec, valid_ec);
-}
-
-void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
-       vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
-                                       vm->page_size, MEM_REGION_DATA);
-
-       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-}
-
-void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
-                        void (*handler)(struct ex_regs *))
-{
-       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
-
-       assert(VECTOR_IS_SYNC(vector));
-       assert(vector < VECTOR_NUM);
-       assert(ec <= ESR_ELx_EC_MAX);
-       handlers->exception_handlers[vector][ec] = handler;
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-                        void (*handler)(struct ex_regs *))
-{
-       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
-
-       assert(!VECTOR_IS_SYNC(vector));
-       assert(vector < VECTOR_NUM);
-       handlers->exception_handlers[vector][0] = handler;
-}
-
-uint32_t guest_get_vcpuid(void)
-{
-       return read_sysreg(tpidr_el1);
-}
-
-static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
-                               uint32_t not_sup_val, uint32_t ipa52_min_val)
-{
-       if (gran == not_sup_val)
-               return 0;
-       else if (gran >= ipa52_min_val && vm_ipa >= 52)
-               return 52;
-       else
-               return min(vm_ipa, 48U);
-}
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
-                                       uint32_t *ipa16k, uint32_t *ipa64k)
-{
-       struct kvm_vcpu_init preferred_init;
-       int kvm_fd, vm_fd, vcpu_fd, err;
-       uint64_t val;
-       uint32_t gran;
-       struct kvm_one_reg reg = {
-               .id     = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
-               .addr   = (uint64_t)&val,
-       };
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-       vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
-       TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
-
-       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
-       TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
-
-       err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
-       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
-       err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
-       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
-
-       err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
-       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
-
-       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
-       *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
-                                       ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
-
-       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
-       *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
-                                       ID_AA64MMFR0_EL1_TGRAN64_IMP);
-
-       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
-       *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
-                                       ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
-
-       close(vcpu_fd);
-       close(vm_fd);
-       close(kvm_fd);
-}
-
-#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5,    \
-                    arg6, res)                                                 \
-       asm volatile("mov   w0, %w[function_id]\n"                              \
-                    "mov   x1, %[arg0]\n"                                      \
-                    "mov   x2, %[arg1]\n"                                      \
-                    "mov   x3, %[arg2]\n"                                      \
-                    "mov   x4, %[arg3]\n"                                      \
-                    "mov   x5, %[arg4]\n"                                      \
-                    "mov   x6, %[arg5]\n"                                      \
-                    "mov   x7, %[arg6]\n"                                      \
-                    #insn  "#0\n"                                              \
-                    "mov   %[res0], x0\n"                                      \
-                    "mov   %[res1], x1\n"                                      \
-                    "mov   %[res2], x2\n"                                      \
-                    "mov   %[res3], x3\n"                                      \
-                    : [res0] "=r"(res->a0), [res1] "=r"(res->a1),              \
-                      [res2] "=r"(res->a2), [res3] "=r"(res->a3)               \
-                    : [function_id] "r"(function_id), [arg0] "r"(arg0),        \
-                      [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),    \
-                      [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)     \
-                    : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
-
-
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res)
-{
-       __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
-                    arg6, res);
-}
-
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res)
-{
-       __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
-                    arg6, res);
-}
-
-void kvm_selftest_arch_init(void)
-{
-       /*
-        * arm64 doesn't have a true default mode, so start by computing the
-        * available IPA space and page sizes early.
-        */
-       guest_modes_append_default();
-}
-
-void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
-{
-       /*
-        * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
-        * is [0, 2^(64 - TCR_EL1.T0SZ)).
-        */
-       sparsebit_set_num(vm->vpages_valid, 0,
-                         (1ULL << vm->va_bits) >> vm->page_shift);
-}
-
-/* Helper to call wfi instruction. */
-void wfi(void)
-{
-       asm volatile("wfi");
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c

deleted file mode 100644 (file)

index a076e78..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 Spinlock support
- */
-#include <stdint.h>
-
-#include "spinlock.h"
-
-void spin_lock(struct spinlock *lock)
-{
-       int val, res;
-
-       asm volatile(
-       "1:     ldaxr   %w0, [%2]\n"
-       "       cbnz    %w0, 1b\n"
-       "       mov     %w0, #1\n"
-       "       stxr    %w1, %w0, [%2]\n"
-       "       cbnz    %w1, 1b\n"
-       : "=&r" (val), "=&r" (res)
-       : "r" (&lock->v)
-       : "memory");
-}
-
-void spin_unlock(struct spinlock *lock)
-{
-       asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c

deleted file mode 100644 (file)

index ddab0ce..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-vm_vaddr_t *ucall_exit_mmio_addr;
-
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-       vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
-
-       virt_map(vm, mmio_gva, mmio_gpa, 1);
-
-       vm->ucall_mmio_addr = mmio_gpa;
-
-       write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
-}
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       if (run->exit_reason == KVM_EXIT_MMIO &&
-           run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
-               TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
-                           "Unexpected ucall exit mmio address access");
-               return (void *)(*((uint64_t *)run->mmio.data));
-       }
-
-       return NULL;
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c

deleted file mode 100644 (file)

index 4427f43..0000000
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) v3 host support
- */
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <asm/cputype.h>
-#include <asm/kvm_para.h>
-#include <asm/kvm.h>
-
-#include "kvm_util.h"
-#include "vgic.h"
-#include "gic.h"
-#include "gic_v3.h"
-
-/*
- * vGIC-v3 default host setup
- *
- * Input args:
- *     vm - KVM VM
- *     nr_vcpus - Number of vCPUs supported by this VM
- *
- * Output args: None
- *
- * Return: GIC file-descriptor or negative error code upon failure
- *
- * The function creates a vGIC-v3 device and maps the distributor and
- * redistributor regions of the guest. Since it depends on the number of
- * vCPUs for the VM, it must be called after all the vCPUs have been created.
- */
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
-{
-       int gic_fd;
-       uint64_t attr;
-       struct list_head *iter;
-       unsigned int nr_gic_pages, nr_vcpus_created = 0;
-
-       TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
-
-       /*
-        * Make sure that the caller is infact calling this
-        * function after all the vCPUs are added.
-        */
-       list_for_each(iter, &vm->vcpus)
-               nr_vcpus_created++;
-       TEST_ASSERT(nr_vcpus == nr_vcpus_created,
-                       "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
-                       nr_vcpus, nr_vcpus_created);
-
-       /* Distributor setup */
-       gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-       if (gic_fd < 0)
-               return gic_fd;
-
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
-
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       attr = GICD_BASE_GPA;
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
-       nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
-       virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
-
-       /* Redistributor setup */
-       attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
-       nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
-                                               KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
-       virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
-
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       return gic_fd;
-}
-
-/* should only work for level sensitive interrupts */
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
-{
-       uint64_t attr = 32 * (intid / 32);
-       uint64_t index = intid % 32;
-       uint64_t val;
-       int ret;
-
-       ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
-                                   attr, &val);
-       if (ret != 0)
-               return ret;
-
-       val |= 1U << index;
-       ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
-                                   attr, &val);
-       return ret;
-}
-
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
-{
-       int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
-
-       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
-}
-
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
-{
-       uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
-
-       TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
-               "doesn't allow injecting SGIs. There's no mask for it.");
-
-       if (INTID_IS_PPI(intid))
-               irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
-       else
-               irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
-
-       return _kvm_irq_line(vm, irq, level);
-}
-
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
-{
-       int ret = _kvm_arm_irq_line(vm, intid, level);
-
-       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
-}
-
-static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
-                         uint64_t reg_off)
-{
-       uint64_t reg = intid / 32;
-       uint64_t index = intid % 32;
-       uint64_t attr = reg_off + reg * 4;
-       uint64_t val;
-       bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
-
-       uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
-                                         : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
-
-       if (intid_is_private) {
-               /* TODO: only vcpu 0 implemented for now. */
-               assert(vcpu->id == 0);
-               attr += SZ_64K;
-       }
-
-       /* Check that the addr part of the attr is within 32 bits. */
-       assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
-
-       /*
-        * All calls will succeed, even with invalid intid's, as long as the
-        * addr part of the attr is within 32 bits (checked above). An invalid
-        * intid will just make the read/writes point to above the intended
-        * register space (i.e., ICPENDR after ISPENDR).
-        */
-       kvm_device_attr_get(gic_fd, group, attr, &val);
-       val |= 1ULL << index;
-       kvm_device_attr_set(gic_fd, group, attr, &val);
-}
-
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
-{
-       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
-}
-
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
-{
-       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
-}
-
-int vgic_its_setup(struct kvm_vm *vm)
-{
-       int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
-       u64 attr;
-
-       attr = GITS_BASE_GPA;
-       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_ITS_ADDR_TYPE, &attr);
-
-       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
-                vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
-
-       return its_fd;
-}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c

new file mode 100644 (file)

index 0000000..7abbf88
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu)
+{
+       gic_common_ops->gic_cpu_init(cpu);
+}
+
+static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
+{
+       const struct gic_common_ops *gic_ops = NULL;
+
+       spin_lock(&gic_lock);
+
+       /* Distributor initialization is needed only once per VM */
+       if (gic_common_ops) {
+               spin_unlock(&gic_lock);
+               return;
+       }
+
+       if (type == GIC_V3)
+               gic_ops = &gicv3_ops;
+
+       GUEST_ASSERT(gic_ops);
+
+       gic_ops->gic_init(nr_cpus);
+       gic_common_ops = gic_ops;
+
+       /* Make sure that the initialized data is visible to all the vCPUs */
+       dsb(sy);
+
+       spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus)
+{
+       uint32_t cpu = guest_get_vcpuid();
+
+       GUEST_ASSERT(type < GIC_TYPE_MAX);
+       GUEST_ASSERT(nr_cpus);
+
+       gic_dist_init(type, nr_cpus);
+       gic_cpu_init(cpu);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+       uint64_t irqstat;
+       unsigned int intid;
+
+       GUEST_ASSERT(gic_common_ops);
+
+       irqstat = gic_common_ops->gic_read_iar();
+       intid = irqstat & GENMASK(23, 0);
+
+       return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_write_eoir(intid);
+}
+
+void gic_set_dir(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h

new file mode 100644 (file)

index 0000000..d24e9ec
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+       void (*gic_init)(unsigned int nr_cpus);
+       void (*gic_cpu_init)(unsigned int cpu);
+       void (*gic_irq_enable)(unsigned int intid);
+       void (*gic_irq_disable)(unsigned int intid);
+       uint64_t (*gic_read_iar)(void);
+       void (*gic_write_eoir)(uint32_t irq);
+       void (*gic_write_dir)(uint32_t irq);
+       void (*gic_set_eoi_split)(bool split);
+       void (*gic_set_priority_mask)(uint64_t mask);
+       void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+       void (*gic_irq_set_active)(uint32_t intid);
+       void (*gic_irq_clear_active)(uint32_t intid);
+       bool (*gic_irq_get_active)(uint32_t intid);
+       void (*gic_irq_set_pending)(uint32_t intid);
+       void (*gic_irq_clear_pending)(uint32_t intid);
+       bool (*gic_irq_get_pending)(uint32_t intid);
+       void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c

new file mode 100644 (file)

index 0000000..66d0550
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_private.h"
+
+#define GICV3_MAX_CPUS                 512
+
+#define GICD_INT_DEF_PRI               0xa0
+#define GICD_INT_DEF_PRI_X4            ((GICD_INT_DEF_PRI << 24) |\
+                                       (GICD_INT_DEF_PRI << 16) |\
+                                       (GICD_INT_DEF_PRI << 8) |\
+                                       GICD_INT_DEF_PRI)
+
+#define ICC_PMR_DEF_PRIO               0xf0
+
+struct gicv3_data {
+       unsigned int nr_cpus;
+       unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base)      (redist_base + SZ_64K)
+#define DIST_BIT                               (1U << 31)
+
+enum gicv3_intid_range {
+       SGI_RANGE,
+       PPI_RANGE,
+       SPI_RANGE,
+       INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+       unsigned int count = 100000; /* 1s */
+
+       while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
+               GUEST_ASSERT(count--);
+               udelay(10);
+       }
+}
+
+static inline volatile void *gicr_base_cpu(uint32_t cpu)
+{
+       /* Align all the redistributors sequentially */
+       return GICR_BASE_GVA + cpu * SZ_64K * 2;
+}
+
+static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
+{
+       unsigned int count = 100000; /* 1s */
+
+       while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
+               GUEST_ASSERT(count--);
+               udelay(10);
+       }
+}
+
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+       if (cpu_or_dist & DIST_BIT)
+               gicv3_gicd_wait_for_rwp();
+       else
+               gicv3_gicr_wait_for_rwp(cpu_or_dist);
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+       switch (intid) {
+       case 0 ... 15:
+               return SGI_RANGE;
+       case 16 ... 31:
+               return PPI_RANGE;
+       case 32 ... 1019:
+               return SPI_RANGE;
+       }
+
+       /* We should not be reaching here */
+       GUEST_ASSERT(0);
+
+       return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+       uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+       dsb(sy);
+       return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+       write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+       isb();
+}
+
+static void gicv3_write_dir(uint32_t irq)
+{
+       write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+       isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+       write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+       uint32_t val;
+
+       /*
+        * All other fields are read-only, so no need to read CTLR first. In
+        * fact, the kernel does the same.
+        */
+       val = split ? (1U << 1) : 0;
+       write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+       isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+       return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+       writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+       return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+               uint32_t mask, uint32_t reg_val)
+{
+       uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+       tmp |= (reg_val & mask);
+       gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+               uint32_t reg_bits, uint32_t bits_per_field,
+               bool write, uint32_t *val)
+{
+       uint32_t cpu = guest_get_vcpuid();
+       enum gicv3_intid_range intid_range = get_intid_range(intid);
+       uint32_t fields_per_reg, index, mask, shift;
+       uint32_t cpu_or_dist;
+
+       GUEST_ASSERT(bits_per_field <= reg_bits);
+       GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+       /*
+        * This function does not support 64 bit accesses. Just asserting here
+        * until we implement readq/writeq.
+        */
+       GUEST_ASSERT(reg_bits == 32);
+
+       fields_per_reg = reg_bits / bits_per_field;
+       index = intid % fields_per_reg;
+       shift = index * bits_per_field;
+       mask = ((1U << bits_per_field) - 1) << shift;
+
+       /* Set offset to the actual register holding intid's config. */
+       offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+       cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+       if (write)
+               gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+       *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+               uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+       gicv3_access_reg(intid, offset, reg_bits,
+                       bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+               uint32_t reg_bits, uint32_t bits_per_field)
+{
+       uint32_t val;
+
+       gicv3_access_reg(intid, offset, reg_bits,
+                       bits_per_field, false, &val);
+       return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+       gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+       uint32_t val;
+
+       /* N/A for private interrupts. */
+       GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+       val = is_edge ? 2 : 0;
+       gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+       bool is_spi = get_intid_range(intid) == SPI_RANGE;
+       uint32_t cpu = guest_get_vcpuid();
+
+       gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+       bool is_spi = get_intid_range(intid) == SPI_RANGE;
+       uint32_t cpu = guest_get_vcpuid();
+
+       gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+       return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_pending(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_pending(uint32_t intid)
+{
+       return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
+}
+
+static void gicv3_enable_redist(volatile void *redist_base)
+{
+       uint32_t val = readl(redist_base + GICR_WAKER);
+       unsigned int count = 100000; /* 1s */
+
+       val &= ~GICR_WAKER_ProcessorSleep;
+       writel(val, redist_base + GICR_WAKER);
+
+       /* Wait until the processor is 'active' */
+       while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+               GUEST_ASSERT(count--);
+               udelay(10);
+       }
+}
+
+static void gicv3_cpu_init(unsigned int cpu)
+{
+       volatile void *sgi_base;
+       unsigned int i;
+       volatile void *redist_base_cpu;
+
+       GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+       redist_base_cpu = gicr_base_cpu(cpu);
+       sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+       gicv3_enable_redist(redist_base_cpu);
+
+       /*
+        * Mark all the SGI and PPI interrupts as non-secure Group-1.
+        * Also, deactivate and disable them.
+        */
+       writel(~0, sgi_base + GICR_IGROUPR0);
+       writel(~0, sgi_base + GICR_ICACTIVER0);
+       writel(~0, sgi_base + GICR_ICENABLER0);
+
+       /* Set a default priority for all the SGIs and PPIs */
+       for (i = 0; i < 32; i += 4)
+               writel(GICD_INT_DEF_PRI_X4,
+                               sgi_base + GICR_IPRIORITYR0 + i);
+
+       gicv3_gicr_wait_for_rwp(cpu);
+
+       /* Enable the GIC system register (ICC_*) access */
+       write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+                       SYS_ICC_SRE_EL1);
+
+       /* Set a default priority threshold */
+       write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+       /* Enable non-secure Group-1 interrupts */
+       write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
+}
+
+static void gicv3_dist_init(void)
+{
+       unsigned int i;
+
+       /* Disable the distributor until we set things up */
+       writel(0, GICD_BASE_GVA + GICD_CTLR);
+       gicv3_gicd_wait_for_rwp();
+
+       /*
+        * Mark all the SPI interrupts as non-secure Group-1.
+        * Also, deactivate and disable them.
+        */
+       for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+               writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
+               writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
+               writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
+       }
+
+       /* Set a default priority for all the SPIs */
+       for (i = 32; i < gicv3_data.nr_spis; i += 4)
+               writel(GICD_INT_DEF_PRI_X4,
+                               GICD_BASE_GVA + GICD_IPRIORITYR + i);
+
+       /* Wait for the settings to sync-in */
+       gicv3_gicd_wait_for_rwp();
+
+       /* Finally, enable the distributor globally with ARE */
+       writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+                       GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
+       gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus)
+{
+       GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+       gicv3_data.nr_cpus = nr_cpus;
+       gicv3_data.nr_spis = GICD_TYPER_SPIS(
+                               readl(GICD_BASE_GVA + GICD_TYPER));
+       if (gicv3_data.nr_spis > 1020)
+               gicv3_data.nr_spis = 1020;
+
+       /*
+        * Initialize only the distributor for now.
+        * The redistributor and CPU interfaces are initialized
+        * later for every PE.
+        */
+       gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+       .gic_init = gicv3_init,
+       .gic_cpu_init = gicv3_cpu_init,
+       .gic_irq_enable = gicv3_irq_enable,
+       .gic_irq_disable = gicv3_irq_disable,
+       .gic_read_iar = gicv3_read_iar,
+       .gic_write_eoir = gicv3_write_eoir,
+       .gic_write_dir = gicv3_write_dir,
+       .gic_set_priority_mask = gicv3_set_priority_mask,
+       .gic_set_eoi_split = gicv3_set_eoi_split,
+       .gic_set_priority = gicv3_set_priority,
+       .gic_irq_set_active = gicv3_irq_set_active,
+       .gic_irq_clear_active = gicv3_irq_clear_active,
+       .gic_irq_get_active = gicv3_irq_get_active,
+       .gic_irq_set_pending = gicv3_irq_set_pending,
+       .gic_irq_clear_pending = gicv3_irq_clear_pending,
+       .gic_irq_get_pending = gicv3_irq_get_pending,
+       .gic_irq_set_config = gicv3_irq_set_config,
+};
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+                          vm_paddr_t pend_table)
+{
+       volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
+
+       u32 ctlr;
+       u64 val;
+
+       val = (cfg_table |
+              GICR_PROPBASER_InnerShareable |
+              GICR_PROPBASER_RaWaWb |
+              ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
+       writeq_relaxed(val, rdist_base + GICR_PROPBASER);
+
+       val = (pend_table |
+              GICR_PENDBASER_InnerShareable |
+              GICR_PENDBASER_RaWaWb);
+       writeq_relaxed(val, rdist_base + GICR_PENDBASER);
+
+       ctlr = readl_relaxed(rdist_base + GICR_CTLR);
+       ctlr |= GICR_CTLR_ENABLE_LPIS;
+       writel_relaxed(ctlr, rdist_base + GICR_CTLR);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c

new file mode 100644 (file)

index 0000000..09f2705
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
+ * over in the kernel tree.
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "processor.h"
+
+static u64 its_read_u64(unsigned long offset)
+{
+       return readq_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u64(unsigned long offset, u64 val)
+{
+       writeq_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static u32 its_read_u32(unsigned long offset)
+{
+       return readl_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u32(unsigned long offset, u32 val)
+{
+       writel_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static unsigned long its_find_baser(unsigned int type)
+{
+       int i;
+
+       for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+               u64 baser;
+               unsigned long offset = GITS_BASER + (i * sizeof(baser));
+
+               baser = its_read_u64(offset);
+               if (GITS_BASER_TYPE(baser) == type)
+                       return offset;
+       }
+
+       GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
+       return -1;
+}
+
+static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+{
+       unsigned long offset = its_find_baser(type);
+       u64 baser;
+
+       baser = ((size / SZ_64K) - 1) |
+               GITS_BASER_PAGE_SIZE_64K |
+               GITS_BASER_InnerShareable |
+               base |
+               GITS_BASER_RaWaWb |
+               GITS_BASER_VALID;
+
+       its_write_u64(offset, baser);
+}
+
+static void its_install_cmdq(vm_paddr_t base, size_t size)
+{
+       u64 cbaser;
+
+       cbaser = ((size / SZ_4K) - 1) |
+                GITS_CBASER_InnerShareable |
+                base |
+                GITS_CBASER_RaWaWb |
+                GITS_CBASER_VALID;
+
+       its_write_u64(GITS_CBASER, cbaser);
+}
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+             vm_paddr_t device_tbl, size_t device_tbl_sz,
+             vm_paddr_t cmdq, size_t cmdq_size)
+{
+       u32 ctlr;
+
+       its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
+       its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
+       its_install_cmdq(cmdq, cmdq_size);
+
+       ctlr = its_read_u32(GITS_CTLR);
+       ctlr |= GITS_CTLR_ENABLE;
+       its_write_u32(GITS_CTLR, ctlr);
+}
+
+struct its_cmd_block {
+       union {
+               u64     raw_cmd[4];
+               __le64  raw_cmd_le[4];
+       };
+};
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+       /* Let's fixup BE commands */
+       cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
+       cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
+       cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
+       cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
+{
+       u64 mask = GENMASK_ULL(h, l);
+       *raw_cmd &= ~mask;
+       *raw_cmd |= (val << l) & mask;
+}
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+       its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+       its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+       its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+       its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+       its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+       its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+       its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+       its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+       its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
+}
+
+#define GITS_CMDQ_POLL_ITERATIONS      0
+
+static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
+{
+       u64 cwriter = its_read_u64(GITS_CWRITER);
+       struct its_cmd_block *dst = cmdq_base + cwriter;
+       u64 cbaser = its_read_u64(GITS_CBASER);
+       size_t cmdq_size;
+       u64 next;
+       int i;
+
+       cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
+
+       its_fixup_cmd(cmd);
+
+       WRITE_ONCE(*dst, *cmd);
+       dsb(ishst);
+       next = (cwriter + sizeof(*cmd)) % cmdq_size;
+       its_write_u64(GITS_CWRITER, next);
+
+       /*
+        * Polling isn't necessary considering KVM's ITS emulation at the time
+        * of writing this, as the CMDQ is processed synchronously after a write
+        * to CWRITER.
+        */
+       for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
+               __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
+                              "ITS didn't process command at offset %lu after %d iterations\n",
+                              cwriter, i);
+
+               cpu_relax();
+       }
+}
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+                      size_t itt_size, bool valid)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_MAPD);
+       its_encode_devid(&cmd, device_id);
+       its_encode_size(&cmd, ilog2(itt_size) - 1);
+       its_encode_itt(&cmd, itt_base);
+       its_encode_valid(&cmd, valid);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_MAPC);
+       its_encode_collection(&cmd, collection_id);
+       its_encode_target(&cmd, vcpu_id);
+       its_encode_valid(&cmd, valid);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+                       u32 collection_id, u32 intid)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_MAPTI);
+       its_encode_devid(&cmd, device_id);
+       its_encode_event_id(&cmd, event_id);
+       its_encode_phys_id(&cmd, intid);
+       its_encode_collection(&cmd, collection_id);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_INVALL);
+       its_encode_collection(&cmd, collection_id);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S

new file mode 100644 (file)

index 0000000..0e443ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+       add     sp, sp, #-16 * 17
+
+       stp     x0, x1, [sp, #16 * 0]
+       stp     x2, x3, [sp, #16 * 1]
+       stp     x4, x5, [sp, #16 * 2]
+       stp     x6, x7, [sp, #16 * 3]
+       stp     x8, x9, [sp, #16 * 4]
+       stp     x10, x11, [sp, #16 * 5]
+       stp     x12, x13, [sp, #16 * 6]
+       stp     x14, x15, [sp, #16 * 7]
+       stp     x16, x17, [sp, #16 * 8]
+       stp     x18, x19, [sp, #16 * 9]
+       stp     x20, x21, [sp, #16 * 10]
+       stp     x22, x23, [sp, #16 * 11]
+       stp     x24, x25, [sp, #16 * 12]
+       stp     x26, x27, [sp, #16 * 13]
+       stp     x28, x29, [sp, #16 * 14]
+
+       /*
+        * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+        * at it. It will _not_ be used to restore the sp on return from the
+        * exception so handlers can not update it.
+        */
+       add     x1, sp, #16 * 17
+       stp     x30, x1, [sp, #16 * 15] /* x30, SP */
+
+       mrs     x1, elr_el1
+       mrs     x2, spsr_el1
+       stp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+       ldp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+       msr     elr_el1, x1
+       msr     spsr_el1, x2
+
+       /* sp is not restored */
+       ldp     x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+       ldp     x28, x29, [sp, #16 * 14]
+       ldp     x26, x27, [sp, #16 * 13]
+       ldp     x24, x25, [sp, #16 * 12]
+       ldp     x22, x23, [sp, #16 * 11]
+       ldp     x20, x21, [sp, #16 * 10]
+       ldp     x18, x19, [sp, #16 * 9]
+       ldp     x16, x17, [sp, #16 * 8]
+       ldp     x14, x15, [sp, #16 * 7]
+       ldp     x12, x13, [sp, #16 * 6]
+       ldp     x10, x11, [sp, #16 * 5]
+       ldp     x8, x9, [sp, #16 * 4]
+       ldp     x6, x7, [sp, #16 * 3]
+       ldp     x4, x5, [sp, #16 * 2]
+       ldp     x2, x3, [sp, #16 * 1]
+       ldp     x0, x1, [sp, #16 * 0]
+
+       add     sp, sp, #16 * 17
+
+       eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set   vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+       save_registers
+       mov     x0, sp
+       mov     x1, #vector
+       bl      route_exception
+       restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+       b       handler_\label
+.popsection
+
+.set   vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+       mov     x0, #vector
+       mov     x1, #0 /* ec */
+       mov     x2, #0 /* valid_ec */
+       b       kvm_exit_unexpected_exception
+.popsection
+
+.set   vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+       HANDLER_INVALID                         // Synchronous EL1t
+       HANDLER_INVALID                         // IRQ EL1t
+       HANDLER_INVALID                         // FIQ EL1t
+       HANDLER_INVALID                         // Error EL1t
+
+       HANDLER el1h_sync                       // Synchronous EL1h
+       HANDLER el1h_irq                        // IRQ EL1h
+       HANDLER el1h_fiq                        // FIQ EL1h
+       HANDLER el1h_error                      // Error EL1h
+
+       HANDLER el0_sync_64                     // Synchronous 64-bit EL0
+       HANDLER el0_irq_64                      // IRQ 64-bit EL0
+       HANDLER el0_fiq_64                      // FIQ 64-bit EL0
+       HANDLER el0_error_64                    // Error 64-bit EL0
+
+       HANDLER el0_sync_32                     // Synchronous 32-bit EL0
+       HANDLER el0_irq_32                      // IRQ 32-bit EL0
+       HANDLER el0_fiq_32                      // FIQ 32-bit EL0
+       HANDLER el0_error_32                    // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c

new file mode 100644 (file)

index 0000000..7ba3aa3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "guest_modes.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
+
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN    0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+       return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+       uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+       return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+       TEST_ASSERT(vm->pgtable_levels == 4,
+               "Mode %d does not have 4 page table levels", vm->mode);
+
+       return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+       TEST_ASSERT(vm->pgtable_levels >= 3,
+               "Mode %d does not have >= 3 page table levels", vm->mode);
+
+       return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+       return (gva >> vm->page_shift) & mask;
+}
+
+static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
+{
+       return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
+           (vm->pa_bits > 48 || vm->va_bits > 48);
+}
+
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+{
+       uint64_t pte;
+
+       if (use_lpa2_pte_format(vm)) {
+               pte = pa & GENMASK(49, vm->page_shift);
+               pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
+               attrs &= ~GENMASK(9, 8);
+       } else {
+               pte = pa & GENMASK(47, vm->page_shift);
+               if (vm->page_shift == 16)
+                       pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+       }
+       pte |= attrs;
+
+       return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+       uint64_t pa;
+
+       if (use_lpa2_pte_format(vm)) {
+               pa = pte & GENMASK(49, vm->page_shift);
+               pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+       } else {
+               pa = pte & GENMASK(47, vm->page_shift);
+               if (vm->page_shift == 16)
+                       pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+       }
+
+       return pa;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+       return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
+{
+       return 1 << (vm->page_shift - 3);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+       size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+       if (vm->pgd_created)
+               return;
+
+       vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+                                    KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+                                    vm->memslots[MEM_REGION_PT]);
+       vm->pgd_created = true;
+}
+
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+                        uint64_t flags)
+{
+       uint8_t attr_idx = flags & 7;
+       uint64_t *ptep;
+
+       TEST_ASSERT((vaddr % vm->page_size) == 0,
+               "Virtual address not on page boundary,\n"
+               "  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+               (vaddr >> vm->page_shift)),
+               "Invalid virtual address, vaddr: 0x%lx", vaddr);
+       TEST_ASSERT((paddr % vm->page_size) == 0,
+               "Physical address not on page boundary,\n"
+               "  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+               "Physical address beyond beyond maximum supported,\n"
+               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+               paddr, vm->max_gfn, vm->page_size);
+
+       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+       if (!*ptep)
+               *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+
+       switch (vm->pgtable_levels) {
+       case 4:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+               if (!*ptep)
+                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+               /* fall through */
+       case 3:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+               if (!*ptep)
+                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+               /* fall through */
+       case 2:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+               break;
+       default:
+               TEST_FAIL("Page table levels must be 2, 3, or 4");
+       }
+
+       *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3);  /* AF */
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+       uint64_t attr_idx = MT_NORMAL;
+
+       _virt_pg_map(vm, vaddr, paddr, attr_idx);
+}
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       uint64_t *ptep;
+
+       if (!vm->pgd_created)
+               goto unmapped_gva;
+
+       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+       if (!ptep)
+               goto unmapped_gva;
+
+       switch (vm->pgtable_levels) {
+       case 4:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+               if (!ptep)
+                       goto unmapped_gva;
+               /* fall through */
+       case 3:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+               if (!ptep)
+                       goto unmapped_gva;
+               /* fall through */
+       case 2:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+               if (!ptep)
+                       goto unmapped_gva;
+               break;
+       default:
+               TEST_FAIL("Page table levels must be 2, 3, or 4");
+       }
+
+       return ptep;
+
+unmapped_gva:
+       TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+       exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+       return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG
+       static const char * const type[] = { "", "pud", "pmd", "pte" };
+       uint64_t pte, *ptep;
+
+       if (level == 4)
+               return;
+
+       for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+               ptep = addr_gpa2hva(vm, pte);
+               if (!*ptep)
+                       continue;
+               fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+       }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+       int level = 4 - (vm->pgtable_levels - 1);
+       uint64_t pgd, *ptep;
+
+       if (!vm->pgd_created)
+               return;
+
+       for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+               ptep = addr_gpa2hva(vm, pgd);
+               if (!*ptep)
+                       continue;
+               fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+       }
+}
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
+{
+       struct kvm_vcpu_init default_init = { .target = -1, };
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
+
+       if (!init)
+               init = &default_init;
+
+       if (init->target == -1) {
+               struct kvm_vcpu_init preferred;
+               vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+               init->target = preferred.target;
+       }
+
+       vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+
+       /*
+        * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+        * registers, which the variable argument list macros do.
+        */
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+
+       sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
+       tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
+
+       /* Configure base granule size */
+       switch (vm->mode) {
+       case VM_MODE_PXXV48_4K:
+               TEST_FAIL("AArch64 does not support 4K sized pages "
+                         "with ANY-bit physical address ranges");
+       case VM_MODE_P52V48_64K:
+       case VM_MODE_P48V48_64K:
+       case VM_MODE_P40V48_64K:
+       case VM_MODE_P36V48_64K:
+               tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+               break;
+       case VM_MODE_P52V48_16K:
+       case VM_MODE_P48V48_16K:
+       case VM_MODE_P40V48_16K:
+       case VM_MODE_P36V48_16K:
+       case VM_MODE_P36V47_16K:
+               tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+               break;
+       case VM_MODE_P52V48_4K:
+       case VM_MODE_P48V48_4K:
+       case VM_MODE_P40V48_4K:
+       case VM_MODE_P36V48_4K:
+               tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+               break;
+       default:
+               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+       }
+
+       ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
+       /* Configure output size */
+       switch (vm->mode) {
+       case VM_MODE_P52V48_4K:
+       case VM_MODE_P52V48_16K:
+       case VM_MODE_P52V48_64K:
+               tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+               ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+               break;
+       case VM_MODE_P48V48_4K:
+       case VM_MODE_P48V48_16K:
+       case VM_MODE_P48V48_64K:
+               tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+               break;
+       case VM_MODE_P40V48_4K:
+       case VM_MODE_P40V48_16K:
+       case VM_MODE_P40V48_64K:
+               tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+               break;
+       case VM_MODE_P36V48_4K:
+       case VM_MODE_P36V48_16K:
+       case VM_MODE_P36V48_64K:
+       case VM_MODE_P36V47_16K:
+               tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+               break;
+       default:
+               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+       }
+
+       sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+       /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+       tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+       tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+       if (use_lpa2_pte_format(vm))
+               tcr_el1 |= (1ul << 59) /* DS */;
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+       uint64_t pstate, pc;
+
+       pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
+       pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+
+       fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
+               indent, "", pstate, pc);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                          struct kvm_vcpu_init *init)
+{
+       size_t stack_size;
+       uint64_t stack_vaddr;
+       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+       stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+                                            vm->page_size;
+       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+                                      DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+                                      MEM_REGION_DATA);
+
+       aarch64_vcpu_setup(vcpu, init);
+
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+       return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                 struct kvm_vcpu_init *init, void *guest_code)
+{
+       struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
+
+       vcpu_arch_set_entry_point(vcpu, guest_code);
+
+       return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       return __aarch64_vcpu_add(vm, vcpu_id, NULL);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+       va_list ap;
+       int i;
+
+       TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+                   "  num: %u", num);
+
+       va_start(ap, num);
+
+       for (i = 0; i < num; i++) {
+               vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+                            va_arg(ap, uint64_t));
+       }
+
+       va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+       ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+       while (1)
+               ;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+               return;
+
+       if (uc.args[2]) /* valid_ec */ {
+               assert(VECTOR_IS_SYNC(uc.args[0]));
+               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+                         uc.args[0], uc.args[1]);
+       } else {
+               assert(!VECTOR_IS_SYNC(uc.args[0]));
+               TEST_FAIL("Unexpected exception (vector:0x%lx)",
+                         uc.args[0]);
+       }
+}
+
+struct handlers {
+       handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+       extern char vectors;
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+       struct handlers *handlers = (struct handlers *)exception_handlers;
+       bool valid_ec;
+       int ec = 0;
+
+       switch (vector) {
+       case VECTOR_SYNC_CURRENT:
+       case VECTOR_SYNC_LOWER_64:
+               ec = ESR_ELx_EC(read_sysreg(esr_el1));
+               valid_ec = true;
+               break;
+       case VECTOR_IRQ_CURRENT:
+       case VECTOR_IRQ_LOWER_64:
+       case VECTOR_FIQ_CURRENT:
+       case VECTOR_FIQ_LOWER_64:
+       case VECTOR_ERROR_CURRENT:
+       case VECTOR_ERROR_LOWER_64:
+               ec = 0;
+               valid_ec = false;
+               break;
+       default:
+               valid_ec = false;
+               goto unexpected_exception;
+       }
+
+       if (handlers && handlers->exception_handlers[vector][ec])
+               return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+       kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+       vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+                                       vm->page_size, MEM_REGION_DATA);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+                        void (*handler)(struct ex_regs *))
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(VECTOR_IS_SYNC(vector));
+       assert(vector < VECTOR_NUM);
+       assert(ec <= ESR_ELx_EC_MAX);
+       handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                        void (*handler)(struct ex_regs *))
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(!VECTOR_IS_SYNC(vector));
+       assert(vector < VECTOR_NUM);
+       handlers->exception_handlers[vector][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+       return read_sysreg(tpidr_el1);
+}
+
+static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
+                               uint32_t not_sup_val, uint32_t ipa52_min_val)
+{
+       if (gran == not_sup_val)
+               return 0;
+       else if (gran >= ipa52_min_val && vm_ipa >= 52)
+               return 52;
+       else
+               return min(vm_ipa, 48U);
+}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+                                       uint32_t *ipa16k, uint32_t *ipa64k)
+{
+       struct kvm_vcpu_init preferred_init;
+       int kvm_fd, vm_fd, vcpu_fd, err;
+       uint64_t val;
+       uint32_t gran;
+       struct kvm_one_reg reg = {
+               .id     = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+               .addr   = (uint64_t)&val,
+       };
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+       vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+       TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+       TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+       err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
+       err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
+
+       err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+       *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
+                                       ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
+
+       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+       *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
+                                       ID_AA64MMFR0_EL1_TGRAN64_IMP);
+
+       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+       *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
+                                       ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
+
+       close(vcpu_fd);
+       close(vm_fd);
+       close(kvm_fd);
+}
+
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5,    \
+                    arg6, res)                                                 \
+       asm volatile("mov   w0, %w[function_id]\n"                              \
+                    "mov   x1, %[arg0]\n"                                      \
+                    "mov   x2, %[arg1]\n"                                      \
+                    "mov   x3, %[arg2]\n"                                      \
+                    "mov   x4, %[arg3]\n"                                      \
+                    "mov   x5, %[arg4]\n"                                      \
+                    "mov   x6, %[arg5]\n"                                      \
+                    "mov   x7, %[arg6]\n"                                      \
+                    #insn  "#0\n"                                              \
+                    "mov   %[res0], x0\n"                                      \
+                    "mov   %[res1], x1\n"                                      \
+                    "mov   %[res2], x2\n"                                      \
+                    "mov   %[res3], x3\n"                                      \
+                    : [res0] "=r"(res->a0), [res1] "=r"(res->a1),              \
+                      [res2] "=r"(res->a2), [res3] "=r"(res->a3)               \
+                    : [function_id] "r"(function_id), [arg0] "r"(arg0),        \
+                      [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),    \
+                      [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)     \
+                    : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res)
+{
+       __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+                    arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res)
+{
+       __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+                    arg6, res);
+}
+
+void kvm_selftest_arch_init(void)
+{
+       /*
+        * arm64 doesn't have a true default mode, so start by computing the
+        * available IPA space and page sizes early.
+        */
+       guest_modes_append_default();
+}
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+       /*
+        * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+        * is [0, 2^(64 - TCR_EL1.T0SZ)).
+        */
+       sparsebit_set_num(vm->vpages_valid, 0,
+                         (1ULL << vm->va_bits) >> vm->page_shift);
+}
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+       asm volatile("wfi");
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c

new file mode 100644 (file)

index 0000000..a076e78
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+       int val, res;
+
+       asm volatile(
+       "1:     ldaxr   %w0, [%2]\n"
+       "       cbnz    %w0, 1b\n"
+       "       mov     %w0, #1\n"
+       "       stxr    %w1, %w0, [%2]\n"
+       "       cbnz    %w1, 1b\n"
+       : "=&r" (val), "=&r" (res)
+       : "r" (&lock->v)
+       : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+       asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c

new file mode 100644 (file)

index 0000000..ddab0ce
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+       vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+       virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+       vm->ucall_mmio_addr = mmio_gpa;
+
+       write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       if (run->exit_reason == KVM_EXIT_MMIO &&
+           run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+               TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+                           "Unexpected ucall exit mmio address access");
+               return (void *)(*((uint64_t *)run->mmio.data));
+       }
+
+       return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c

new file mode 100644 (file)

index 0000000..4427f43
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/cputype.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ *     vm - KVM VM
+ *     nr_vcpus - Number of vCPUs supported by this VM
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+       int gic_fd;
+       uint64_t attr;
+       struct list_head *iter;
+       unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+       TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+       /*
+        * Make sure that the caller is infact calling this
+        * function after all the vCPUs are added.
+        */
+       list_for_each(iter, &vm->vcpus)
+               nr_vcpus_created++;
+       TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+                       "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+                       nr_vcpus, nr_vcpus_created);
+
+       /* Distributor setup */
+       gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+       if (gic_fd < 0)
+               return gic_fd;
+
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
+
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       attr = GICD_BASE_GPA;
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
+       nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+       virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
+
+       /* Redistributor setup */
+       attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
+       nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+                                               KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+       virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
+
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       return gic_fd;
+}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+       uint64_t attr = 32 * (intid / 32);
+       uint64_t index = intid % 32;
+       uint64_t val;
+       int ret;
+
+       ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                                   attr, &val);
+       if (ret != 0)
+               return ret;
+
+       val |= 1U << index;
+       ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                                   attr, &val);
+       return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+       int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+       uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+       TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+               "doesn't allow injecting SGIs. There's no mask for it.");
+
+       if (INTID_IS_PPI(intid))
+               irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+       else
+               irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+       return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+       int ret = _kvm_arm_irq_line(vm, intid, level);
+
+       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+                         uint64_t reg_off)
+{
+       uint64_t reg = intid / 32;
+       uint64_t index = intid % 32;
+       uint64_t attr = reg_off + reg * 4;
+       uint64_t val;
+       bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+       uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+                                         : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+       if (intid_is_private) {
+               /* TODO: only vcpu 0 implemented for now. */
+               assert(vcpu->id == 0);
+               attr += SZ_64K;
+       }
+
+       /* Check that the addr part of the attr is within 32 bits. */
+       assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+       /*
+        * All calls will succeed, even with invalid intid's, as long as the
+        * addr part of the attr is within 32 bits (checked above). An invalid
+        * intid will just make the read/writes point to above the intended
+        * register space (i.e., ICPENDR after ISPENDR).
+        */
+       kvm_device_attr_get(gic_fd, group, attr, &val);
+       val |= 1ULL << index;
+       kvm_device_attr_set(gic_fd, group, attr, &val);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
+
+int vgic_its_setup(struct kvm_vm *vm)
+{
+       int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+       u64 attr;
+
+       attr = GITS_BASE_GPA;
+       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_ITS_ADDR_TYPE, &attr);
+
+       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
+                vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
+
+       return its_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c

new file mode 100644 (file)

index 0000000..2c432fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define ICPT_INSTRUCTION       0x04
+#define IPA0_DIAG              0x8300
+
+static void guest_code(void)
+{
+       uint64_t diag318_info = 0x12345678;
+
+       asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       uint64_t reg;
+       uint64_t diag318_info;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_run(vcpu);
+       run = vcpu->run;
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+                   "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+       TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+                   "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+       reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+       diag318_info = run->s.regs.gprs[reg];
+
+       TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+       kvm_vm_free(vm);
+
+       return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+       static uint64_t diag318_info;
+       static bool printed_skip;
+
+       /*
+        * If KVM does not support diag318, then return 0 to
+        * ensure tests do not break.
+        */
+       if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
+               if (!printed_skip) {
+                       fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+                               "Skipping diag318 test.\n");
+                       printed_skip = true;
+               }
+               return 0;
+       }
+
+       /*
+        * If a test has previously requested the diag318 info,
+        * then don't bother spinning up a temporary VM again.
+        */
+       if (!diag318_info)
+               diag318_info = diag318_handler();
+
+       return diag318_info;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c

new file mode 100644 (file)

index 0000000..d540812
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390/facility.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Contains the definition for the global variables to have the test facitlity feature.
+ */
+
+#include "facility.h"
+
+uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+bool stfle_flag;
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c

new file mode 100644 (file)

index 0000000..20cfe97
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM selftest s390x library code - CPU-related functions (page tables...)
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#include "processor.h"
+#include "kvm_util.h"
+
+#define PAGES_PER_REGION 4
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+       vm_paddr_t paddr;
+
+       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+                   vm->page_size);
+
+       if (vm->pgd_created)
+               return;
+
+       paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+                                  vm->memslots[MEM_REGION_PT]);
+       memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+       vm->pgd = paddr;
+       vm->pgd_created = true;
+}
+
+/*
+ * Allocate 4 pages for a region/segment table (ri < 4), or one page for
+ * a page table (ri == 4). Returns a suitable region/segment table entry
+ * which points to the freshly allocated pages.
+ */
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
+{
+       uint64_t taddr;
+
+       taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
+                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+       memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+       return (taddr & REGION_ENTRY_ORIGIN)
+               | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
+               | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
+{
+       int ri, idx;
+       uint64_t *entry;
+
+       TEST_ASSERT((gva % vm->page_size) == 0,
+               "Virtual address not on page boundary,\n"
+               "  vaddr: 0x%lx vm->page_size: 0x%x",
+               gva, vm->page_size);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+               (gva >> vm->page_shift)),
+               "Invalid virtual address, vaddr: 0x%lx",
+               gva);
+       TEST_ASSERT((gpa % vm->page_size) == 0,
+               "Physical address not on page boundary,\n"
+               "  paddr: 0x%lx vm->page_size: 0x%x",
+               gva, vm->page_size);
+       TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+               "Physical address beyond beyond maximum supported,\n"
+               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+               gva, vm->max_gfn, vm->page_size);
+
+       /* Walk through region and segment tables */
+       entry = addr_gpa2hva(vm, vm->pgd);
+       for (ri = 1; ri <= 4; ri++) {
+               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+               if (entry[idx] & REGION_ENTRY_INVALID)
+                       entry[idx] = virt_alloc_region(vm, ri);
+               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+       }
+
+       /* Fill in page table entry */
+       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
+       if (!(entry[idx] & PAGE_INVALID))
+               fprintf(stderr,
+                       "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
+       entry[idx] = gpa;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       int ri, idx;
+       uint64_t *entry;
+
+       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+                   vm->page_size);
+
+       entry = addr_gpa2hva(vm, vm->pgd);
+       for (ri = 1; ri <= 4; ri++) {
+               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+               TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
+                           "No region mapping for vm virtual address 0x%lx",
+                           gva);
+               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+       }
+
+       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
+
+       TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
+                   "No page mapping for vm virtual address 0x%lx", gva);
+
+       return (entry[idx] & ~0xffful) + (gva & 0xffful);
+}
+
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+                          uint64_t ptea_start)
+{
+       uint64_t *pte, ptea;
+
+       for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
+               pte = addr_gpa2hva(vm, ptea);
+               if (*pte & PAGE_INVALID)
+                       continue;
+               fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
+                       indent, "", ptea, *pte);
+       }
+}
+
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+                            uint64_t reg_tab_addr)
+{
+       uint64_t addr, *entry;
+
+       for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
+               entry = addr_gpa2hva(vm, addr);
+               if (*entry & REGION_ENTRY_INVALID)
+                       continue;
+               fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
+                       indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
+                       addr, *entry);
+               if (*entry & REGION_ENTRY_TYPE) {
+                       virt_dump_region(stream, vm, indent + 2,
+                                        *entry & REGION_ENTRY_ORIGIN);
+               } else {
+                       virt_dump_ptes(stream, vm, indent + 2,
+                                      *entry & REGION_ENTRY_ORIGIN);
+               }
+       }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+       if (!vm->pgd_created)
+               return;
+
+       virt_dump_region(stream, vm, indent, vm->pgd);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu->run->psw_addr = (uintptr_t)guest_code;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
+       uint64_t stack_vaddr;
+       struct kvm_regs regs;
+       struct kvm_sregs sregs;
+       struct kvm_vcpu *vcpu;
+
+       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+                   vm->page_size);
+
+       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+                                      DEFAULT_GUEST_STACK_VADDR_MIN,
+                                      MEM_REGION_DATA);
+
+       vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+       /* Setup guest registers */
+       vcpu_regs_get(vcpu, &regs);
+       regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+       vcpu_regs_set(vcpu, &regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs.crs[0] |= 0x00040000;             /* Enable floating point regs */
+       sregs.crs[1] = vm->pgd | 0xf;           /* Primary region table */
+       vcpu_sregs_set(vcpu, &sregs);
+
+       vcpu->run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
+
+       return vcpu;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+       va_list ap;
+       struct kvm_regs regs;
+       int i;
+
+       TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
+                   "  num: %u",
+                   num);
+
+       va_start(ap, num);
+       vcpu_regs_get(vcpu, &regs);
+
+       for (i = 0; i < num; i++)
+               regs.gprs[i + 2] = va_arg(ap, uint64_t);
+
+       vcpu_regs_set(vcpu, &regs);
+       va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+       fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
+               indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c

new file mode 100644 (file)

index 0000000..cca9873
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390/ucall.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
+           run->s390_sieic.icptcode == 4 &&
+           (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
+           (run->s390_sieic.ipb >> 16) == 0x501) {
+               int reg = run->s390_sieic.ipa & 0xf;
+
+               return (void *)run->s.regs.gprs[reg];
+       }
+       return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c

deleted file mode 100644 (file)

index 2c432fa..0000000
--- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test handler for the s390x DIAGNOSE 0x0318 instruction.
- *
- * Copyright (C) 2020, IBM
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-
-#define ICPT_INSTRUCTION       0x04
-#define IPA0_DIAG              0x8300
-
-static void guest_code(void)
-{
-       uint64_t diag318_info = 0x12345678;
-
-       asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
-}
-
-/*
- * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
- * we create an ad-hoc VM here to handle the instruction then extract the
- * necessary data. It is up to the caller to decide what to do with that data.
- */
-static uint64_t diag318_handler(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       uint64_t reg;
-       uint64_t diag318_info;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_run(vcpu);
-       run = vcpu->run;
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
-                   "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
-       TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
-                   "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
-
-       reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
-       diag318_info = run->s.regs.gprs[reg];
-
-       TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
-
-       kvm_vm_free(vm);
-
-       return diag318_info;
-}
-
-uint64_t get_diag318_info(void)
-{
-       static uint64_t diag318_info;
-       static bool printed_skip;
-
-       /*
-        * If KVM does not support diag318, then return 0 to
-        * ensure tests do not break.
-        */
-       if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
-               if (!printed_skip) {
-                       fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
-                               "Skipping diag318 test.\n");
-                       printed_skip = true;
-               }
-               return 0;
-       }
-
-       /*
-        * If a test has previously requested the diag318 info,
-        * then don't bother spinning up a temporary VM again.
-        */
-       if (!diag318_info)
-               diag318_info = diag318_handler();
-
-       return diag318_info;
-}
diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390x/facility.c

deleted file mode 100644 (file)

index d540812..0000000
--- a/tools/testing/selftests/kvm/lib/s390x/facility.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Hariharan Mari <hari55@linux.ibm.com>
- *
- * Contains the definition for the global variables to have the test facitlity feature.
- */
-
-#include "facility.h"
-
-uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
-bool stfle_flag;
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c

deleted file mode 100644 (file)

index 20cfe97..0000000
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM selftest s390x library code - CPU-related functions (page tables...)
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-
-#include "processor.h"
-#include "kvm_util.h"
-
-#define PAGES_PER_REGION 4
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
-       vm_paddr_t paddr;
-
-       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
-                   vm->page_size);
-
-       if (vm->pgd_created)
-               return;
-
-       paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
-                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR,
-                                  vm->memslots[MEM_REGION_PT]);
-       memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
-
-       vm->pgd = paddr;
-       vm->pgd_created = true;
-}
-
-/*
- * Allocate 4 pages for a region/segment table (ri < 4), or one page for
- * a page table (ri == 4). Returns a suitable region/segment table entry
- * which points to the freshly allocated pages.
- */
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
-{
-       uint64_t taddr;
-
-       taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
-                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-       memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
-
-       return (taddr & REGION_ENTRY_ORIGIN)
-               | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
-               | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
-{
-       int ri, idx;
-       uint64_t *entry;
-
-       TEST_ASSERT((gva % vm->page_size) == 0,
-               "Virtual address not on page boundary,\n"
-               "  vaddr: 0x%lx vm->page_size: 0x%x",
-               gva, vm->page_size);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-               (gva >> vm->page_shift)),
-               "Invalid virtual address, vaddr: 0x%lx",
-               gva);
-       TEST_ASSERT((gpa % vm->page_size) == 0,
-               "Physical address not on page boundary,\n"
-               "  paddr: 0x%lx vm->page_size: 0x%x",
-               gva, vm->page_size);
-       TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
-               "Physical address beyond beyond maximum supported,\n"
-               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-               gva, vm->max_gfn, vm->page_size);
-
-       /* Walk through region and segment tables */
-       entry = addr_gpa2hva(vm, vm->pgd);
-       for (ri = 1; ri <= 4; ri++) {
-               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
-               if (entry[idx] & REGION_ENTRY_INVALID)
-                       entry[idx] = virt_alloc_region(vm, ri);
-               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
-       }
-
-       /* Fill in page table entry */
-       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
-       if (!(entry[idx] & PAGE_INVALID))
-               fprintf(stderr,
-                       "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
-       entry[idx] = gpa;
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       int ri, idx;
-       uint64_t *entry;
-
-       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
-                   vm->page_size);
-
-       entry = addr_gpa2hva(vm, vm->pgd);
-       for (ri = 1; ri <= 4; ri++) {
-               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
-               TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
-                           "No region mapping for vm virtual address 0x%lx",
-                           gva);
-               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
-       }
-
-       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
-
-       TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
-                   "No page mapping for vm virtual address 0x%lx", gva);
-
-       return (entry[idx] & ~0xffful) + (gva & 0xffful);
-}
-
-static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
-                          uint64_t ptea_start)
-{
-       uint64_t *pte, ptea;
-
-       for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
-               pte = addr_gpa2hva(vm, ptea);
-               if (*pte & PAGE_INVALID)
-                       continue;
-               fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
-                       indent, "", ptea, *pte);
-       }
-}
-
-static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
-                            uint64_t reg_tab_addr)
-{
-       uint64_t addr, *entry;
-
-       for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
-               entry = addr_gpa2hva(vm, addr);
-               if (*entry & REGION_ENTRY_INVALID)
-                       continue;
-               fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
-                       indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
-                       addr, *entry);
-               if (*entry & REGION_ENTRY_TYPE) {
-                       virt_dump_region(stream, vm, indent + 2,
-                                        *entry & REGION_ENTRY_ORIGIN);
-               } else {
-                       virt_dump_ptes(stream, vm, indent + 2,
-                                      *entry & REGION_ENTRY_ORIGIN);
-               }
-       }
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
-       if (!vm->pgd_created)
-               return;
-
-       virt_dump_region(stream, vm, indent, vm->pgd);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
-       vcpu->run->psw_addr = (uintptr_t)guest_code;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
-       uint64_t stack_vaddr;
-       struct kvm_regs regs;
-       struct kvm_sregs sregs;
-       struct kvm_vcpu *vcpu;
-
-       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
-                   vm->page_size);
-
-       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
-                                      DEFAULT_GUEST_STACK_VADDR_MIN,
-                                      MEM_REGION_DATA);
-
-       vcpu = __vm_vcpu_add(vm, vcpu_id);
-
-       /* Setup guest registers */
-       vcpu_regs_get(vcpu, &regs);
-       regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
-       vcpu_regs_set(vcpu, &regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs.crs[0] |= 0x00040000;             /* Enable floating point regs */
-       sregs.crs[1] = vm->pgd | 0xf;           /* Primary region table */
-       vcpu_sregs_set(vcpu, &sregs);
-
-       vcpu->run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
-
-       return vcpu;
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
-       va_list ap;
-       struct kvm_regs regs;
-       int i;
-
-       TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
-                   "  num: %u",
-                   num);
-
-       va_start(ap, num);
-       vcpu_regs_get(vcpu, &regs);
-
-       for (i = 0; i < num; i++)
-               regs.gprs[i + 2] = va_arg(ap, uint64_t);
-
-       vcpu_regs_set(vcpu, &regs);
-       va_end(ap);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
-       fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
-               indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c

deleted file mode 100644 (file)

index cca9873..0000000
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2019 Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
-           run->s390_sieic.icptcode == 4 &&
-           (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
-           (run->s390_sieic.ipb >> 16) == 0x501) {
-               int reg = run->s390_sieic.ipa & 0xf;
-
-               return (void *)run->s.regs.gprs[reg];
-       }
-       return NULL;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c

new file mode 100644 (file)

index 0000000..89153a3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/apic.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+       wrmsr(MSR_IA32_APICBASE,
+             rdmsr(MSR_IA32_APICBASE) &
+               ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+       uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+       /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+       if (val & MSR_IA32_APICBASE_EXTD) {
+               apic_disable();
+               wrmsr(MSR_IA32_APICBASE,
+                     rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+       } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+               wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+       }
+
+       /*
+        * Per SDM: reset value of spurious interrupt vector register has the
+        * APIC software enabled bit=0. It must be enabled in addition to the
+        * enable bit in the MSR.
+        */
+       val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+       xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+       wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+             MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+       x2apic_write_reg(APIC_SPIV,
+                        x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S

new file mode 100644 (file)

index 0000000..7629819
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/handlers.S
@@ -0,0 +1,81 @@
+handle_exception:
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+       push %r11
+       push %r10
+       push %r9
+       push %r8
+
+       push %rdi
+       push %rsi
+       push %rbp
+       push %rbx
+       push %rdx
+       push %rcx
+       push %rax
+       mov %rsp, %rdi
+
+       call route_exception
+
+       pop %rax
+       pop %rcx
+       pop %rdx
+       pop %rbx
+       pop %rbp
+       pop %rsi
+       pop %rdi
+       pop %r8
+       pop %r9
+       pop %r10
+       pop %r11
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+
+       /* Discard vector and error code. */
+       add $16, %rsp
+       iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+       vector = \from
+       .rept \to - \from + 1
+       .align 8
+
+       /* Fetch current address and append it to idt_handlers. */
+666 :
+.pushsection .rodata
+       .quad 666b
+.popsection
+
+       .if ! \has_error
+       pushq $0
+       .endif
+       pushq $vector
+       jmp handle_exception
+       vector = vector + 1
+       .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+       HANDLERS has_error=0 from=0  to=7
+       HANDLERS has_error=1 from=8  to=8
+       HANDLERS has_error=0 from=9  to=9
+       HANDLERS has_error=1 from=10 to=14
+       HANDLERS has_error=0 from=15 to=16
+       HANDLERS has_error=1 from=17 to=17
+       HANDLERS has_error=0 from=18 to=255
+
+.section        .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c

new file mode 100644 (file)

index 0000000..15bc8cd
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+       static struct kvm_cpuid2 *cpuid;
+       int kvm_fd;
+
+       if (cpuid)
+               return cpuid;
+
+       cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+       close(kvm_fd);
+       return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+       static struct kvm_cpuid2 *cpuid_full;
+       const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+       int i, nent = 0;
+
+       if (!cpuid_full) {
+               cpuid_sys = kvm_get_supported_cpuid();
+               cpuid_hv = kvm_get_supported_hv_cpuid();
+
+               cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+               if (!cpuid_full) {
+                       perror("malloc");
+                       abort();
+               }
+
+               /* Need to skip KVM CPUID leaves 0x400000xx */
+               for (i = 0; i < cpuid_sys->nent; i++) {
+                       if (cpuid_sys->entries[i].function >= 0x40000000 &&
+                           cpuid_sys->entries[i].function < 0x40000100)
+                               continue;
+                       cpuid_full->entries[nent] = cpuid_sys->entries[i];
+                       nent++;
+               }
+
+               memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+                      cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+               cpuid_full->nent = nent + cpuid_hv->nent;
+       }
+
+       vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+       vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+       return cpuid;
+}
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
+               return false;
+
+       return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
+}
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+                                                      vm_vaddr_t *p_hv_pages_gva)
+{
+       vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+       struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+       /* Setup of a region of guest memory for the VP Assist page. */
+       hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+       hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+       hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+       /* Setup of a region of guest memory for the partition assist page. */
+       hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+       hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+       hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
+       /* Setup of a region of guest memory for the enlightened VMCS. */
+       hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+       hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+       hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+       *p_hv_pages_gva = hv_pages_gva;
+       return hv;
+}
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+       uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+               HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+       wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+       current_vp_assist = vp_assist;
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c

new file mode 100644 (file)

index 0000000..7f5d62a
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86-specific extensions to memstress.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "vmx.h"
+
+void memstress_l2_guest_code(uint64_t vcpu_id)
+{
+       memstress_guest_code(vcpu_id);
+       vmcall();
+}
+
+extern char memstress_l2_guest_entry[];
+__asm__(
+"memstress_l2_guest_entry:"
+"      mov (%rsp), %rdi;"
+"      call memstress_l2_guest_code;"
+"      ud2;"
+);
+
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       unsigned long *rsp;
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+       GUEST_ASSERT(ept_1g_pages_supported());
+
+       rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+       *rsp = vcpu_id;
+       prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_DONE();
+}
+
+uint64_t memstress_nested_pages(int nr_vcpus)
+{
+       /*
+        * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+        * pages and 4-level paging, plus a few pages per-vCPU for data
+        * structures such as the VMCS.
+        */
+       return 513 + 10 * nr_vcpus;
+}
+
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+       uint64_t start, end;
+
+       prepare_eptp(vmx, vm, 0);
+
+       /*
+        * Identity map the first 4G and the test region with 1G pages so that
+        * KVM can shadow the EPT12 with the maximum huge page size supported
+        * by the backing source.
+        */
+       nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+       start = align_down(memstress_args.gpa, PG_SIZE_1G);
+       end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
+       nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+{
+       struct vmx_pages *vmx, *vmx0 = NULL;
+       struct kvm_regs regs;
+       vm_vaddr_t vmx_gva;
+       int vcpu_id;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_cpu_has_ept());
+
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+               vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+               if (vcpu_id == 0) {
+                       memstress_setup_ept(vmx, vm);
+                       vmx0 = vmx;
+               } else {
+                       /* Share the same EPT table across all vCPUs. */
+                       vmx->eptp = vmx0->eptp;
+                       vmx->eptp_hva = vmx0->eptp_hva;
+                       vmx->eptp_gpa = vmx0->eptp_gpa;
+               }
+
+               /*
+                * Override the vCPU to run memstress_l1_guest_code() which will
+                * bounce it into L2 before calling memstress_guest_code().
+                */
+               vcpu_regs_get(vcpus[vcpu_id], &regs);
+               regs.rip = (unsigned long) memstress_l1_guest_code;
+               vcpu_regs_set(vcpus[vcpu_id], &regs);
+               vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+       }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c

new file mode 100644 (file)

index 0000000..f31f042
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+       INTEL_ARCH_CPU_CYCLES,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       INTEL_ARCH_REFERENCE_CYCLES,
+       INTEL_ARCH_LLC_REFERENCES,
+       INTEL_ARCH_LLC_MISSES,
+       INTEL_ARCH_BRANCHES_RETIRED,
+       INTEL_ARCH_BRANCHES_MISPREDICTED,
+       INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+       AMD_ZEN_CORE_CYCLES,
+       AMD_ZEN_INSTRUCTIONS_RETIRED,
+       AMD_ZEN_BRANCHES_RETIRED,
+       AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c

new file mode 100644 (file)

index 0000000..bd5a802
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -0,0 +1,1293 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "linux/bitmap.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define KERNEL_CS      0x8
+#define KERNEL_DS      0x10
+#define KERNEL_TSS     0x18
+
+vm_vaddr_t exception_handlers;
+bool host_cpu_is_amd;
+bool host_cpu_is_intel;
+bool is_forced_emulation_enabled;
+uint64_t guest_tsc_khz;
+
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
+{
+       fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+               "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+               indent, "",
+               regs->rax, regs->rbx, regs->rcx, regs->rdx);
+       fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+               "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+               indent, "",
+               regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+       fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
+               "r10: 0x%.16llx r11: 0x%.16llx\n",
+               indent, "",
+               regs->r8, regs->r9, regs->r10, regs->r11);
+       fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+               "r14: 0x%.16llx r15: 0x%.16llx\n",
+               indent, "",
+               regs->r12, regs->r13, regs->r14, regs->r15);
+       fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+               indent, "",
+               regs->rip, regs->rflags);
+}
+
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+                        uint8_t indent)
+{
+       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+               "selector: 0x%.4x type: 0x%.2x\n",
+               indent, "", segment->base, segment->limit,
+               segment->selector, segment->type);
+       fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+               "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+               indent, "", segment->present, segment->dpl,
+               segment->db, segment->s, segment->l);
+       fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+               "unusable: 0x%.2x padding: 0x%.2x\n",
+               indent, "", segment->g, segment->avl,
+               segment->unusable, segment->padding);
+}
+
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+                       uint8_t indent)
+{
+       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+               "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+               indent, "", dtable->base, dtable->limit,
+               dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
+{
+       unsigned int i;
+
+       fprintf(stream, "%*scs:\n", indent, "");
+       segment_dump(stream, &sregs->cs, indent + 2);
+       fprintf(stream, "%*sds:\n", indent, "");
+       segment_dump(stream, &sregs->ds, indent + 2);
+       fprintf(stream, "%*ses:\n", indent, "");
+       segment_dump(stream, &sregs->es, indent + 2);
+       fprintf(stream, "%*sfs:\n", indent, "");
+       segment_dump(stream, &sregs->fs, indent + 2);
+       fprintf(stream, "%*sgs:\n", indent, "");
+       segment_dump(stream, &sregs->gs, indent + 2);
+       fprintf(stream, "%*sss:\n", indent, "");
+       segment_dump(stream, &sregs->ss, indent + 2);
+       fprintf(stream, "%*str:\n", indent, "");
+       segment_dump(stream, &sregs->tr, indent + 2);
+       fprintf(stream, "%*sldt:\n", indent, "");
+       segment_dump(stream, &sregs->ldt, indent + 2);
+
+       fprintf(stream, "%*sgdt:\n", indent, "");
+       dtable_dump(stream, &sregs->gdt, indent + 2);
+       fprintf(stream, "%*sidt:\n", indent, "");
+       dtable_dump(stream, &sregs->idt, indent + 2);
+
+       fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+               "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+               indent, "",
+               sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+       fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+               "apic_base: 0x%.16llx\n",
+               indent, "",
+               sregs->cr8, sregs->efer, sregs->apic_base);
+
+       fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+               fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+                       sregs->interrupt_bitmap[i]);
+       }
+}
+
+bool kvm_is_tdp_enabled(void)
+{
+       if (host_cpu_is_intel)
+               return get_kvm_intel_param_bool("ept");
+       else
+               return get_kvm_amd_param_bool("npt");
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       /* If needed, create page map l4 table. */
+       if (!vm->pgd_created) {
+               vm->pgd = vm_alloc_page_table(vm);
+               vm->pgd_created = true;
+       }
+}
+
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+                         uint64_t vaddr, int level)
+{
+       uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
+       uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
+       int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+       TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+                   "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+                   level + 1, vaddr);
+
+       return &page_table[index];
+}
+
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+                                      uint64_t *parent_pte,
+                                      uint64_t vaddr,
+                                      uint64_t paddr,
+                                      int current_level,
+                                      int target_level)
+{
+       uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+
+       paddr = vm_untag_gpa(vm, paddr);
+
+       if (!(*pte & PTE_PRESENT_MASK)) {
+               *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+               if (current_level == target_level)
+                       *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+               else
+                       *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+       } else {
+               /*
+                * Entry already present.  Assert that the caller doesn't want
+                * a hugepage at this level, and that there isn't a hugepage at
+                * this level.
+                */
+               TEST_ASSERT(current_level != target_level,
+                           "Cannot create hugepage at level: %u, vaddr: 0x%lx",
+                           current_level, vaddr);
+               TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+                           "Cannot create page table at level: %u, vaddr: 0x%lx",
+                           current_level, vaddr);
+       }
+       return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+{
+       const uint64_t pg_size = PG_LEVEL_SIZE(level);
+       uint64_t *pml4e, *pdpe, *pde;
+       uint64_t *pte;
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+                   "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       TEST_ASSERT((vaddr % pg_size) == 0,
+                   "Virtual address not aligned,\n"
+                   "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+                   "Invalid virtual address, vaddr: 0x%lx", vaddr);
+       TEST_ASSERT((paddr % pg_size) == 0,
+                   "Physical address not aligned,\n"
+                   "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond maximum supported,\n"
+                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+                   "Unexpected bits in paddr: %lx", paddr);
+
+       /*
+        * Allocate upper level page tables, if not already present.  Return
+        * early if a hugepage was created.
+        */
+       pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
+       if (*pml4e & PTE_LARGE_MASK)
+               return;
+
+       pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
+       if (*pdpe & PTE_LARGE_MASK)
+               return;
+
+       pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
+       if (*pde & PTE_LARGE_MASK)
+               return;
+
+       /* Fill in page table entry. */
+       pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+       TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+                   "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
+       *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+       /*
+        * Neither SEV nor TDX supports shared page tables, so only the final
+        * leaf PTE needs manually set the C/S-bit.
+        */
+       if (vm_is_gpa_protected(vm, paddr))
+               *pte |= vm->arch.c_bit;
+       else
+               *pte |= vm->arch.s_bit;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+       __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+}
+
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+                   uint64_t nr_bytes, int level)
+{
+       uint64_t pg_size = PG_LEVEL_SIZE(level);
+       uint64_t nr_pages = nr_bytes / pg_size;
+       int i;
+
+       TEST_ASSERT(nr_bytes % pg_size == 0,
+                   "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+                   nr_bytes, pg_size);
+
+       for (i = 0; i < nr_pages; i++) {
+               __virt_pg_map(vm, vaddr, paddr, level);
+
+               vaddr += pg_size;
+               paddr += pg_size;
+       }
+}
+
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+       if (*pte & PTE_LARGE_MASK) {
+               TEST_ASSERT(*level == PG_LEVEL_NONE ||
+                           *level == current_level,
+                           "Unexpected hugepage at level %d", current_level);
+               *level = current_level;
+       }
+
+       return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+                                   int *level)
+{
+       uint64_t *pml4e, *pdpe, *pde;
+
+       TEST_ASSERT(!vm->arch.is_pt_protected,
+                   "Walking page tables of protected guests is impossible");
+
+       TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+                   "Invalid PG_LEVEL_* '%d'", *level);
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+               (vaddr >> vm->page_shift)),
+               "Invalid virtual address, vaddr: 0x%lx",
+               vaddr);
+       /*
+        * Based on the mode check above there are 48 bits in the vaddr, so
+        * shift 16 to sign extend the last bit (bit-47),
+        */
+       TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+               "Canonical check failed.  The virtual address is invalid.");
+
+       pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
+       if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
+               return pml4e;
+
+       pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
+       if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
+               return pdpe;
+
+       pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
+       if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
+               return pde;
+
+       return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+}
+
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+{
+       int level = PG_LEVEL_4K;
+
+       return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+       uint64_t *pml4e, *pml4e_start;
+       uint64_t *pdpe, *pdpe_start;
+       uint64_t *pde, *pde_start;
+       uint64_t *pte, *pte_start;
+
+       if (!vm->pgd_created)
+               return;
+
+       fprintf(stream, "%*s                                          "
+               "                no\n", indent, "");
+       fprintf(stream, "%*s      index hvaddr         gpaddr         "
+               "addr         w exec dirty\n",
+               indent, "");
+       pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+       for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+               pml4e = &pml4e_start[n1];
+               if (!(*pml4e & PTE_PRESENT_MASK))
+                       continue;
+               fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
+                       " %u\n",
+                       indent, "",
+                       pml4e - pml4e_start, pml4e,
+                       addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+                       !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+
+               pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
+               for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+                       pdpe = &pdpe_start[n2];
+                       if (!(*pdpe & PTE_PRESENT_MASK))
+                               continue;
+                       fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
+                               "%u  %u\n",
+                               indent, "",
+                               pdpe - pdpe_start, pdpe,
+                               addr_hva2gpa(vm, pdpe),
+                               PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+                               !!(*pdpe & PTE_NX_MASK));
+
+                       pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
+                       for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+                               pde = &pde_start[n3];
+                               if (!(*pde & PTE_PRESENT_MASK))
+                                       continue;
+                               fprintf(stream, "%*spde   0x%-3zx %p "
+                                       "0x%-12lx 0x%-10llx %u  %u\n",
+                                       indent, "", pde - pde_start, pde,
+                                       addr_hva2gpa(vm, pde),
+                                       PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+                                       !!(*pde & PTE_NX_MASK));
+
+                               pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
+                               for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+                                       pte = &pte_start[n4];
+                                       if (!(*pte & PTE_PRESENT_MASK))
+                                               continue;
+                                       fprintf(stream, "%*spte   0x%-3zx %p "
+                                               "0x%-12lx 0x%-10llx %u  %u "
+                                               "    %u    0x%-10lx\n",
+                                               indent, "",
+                                               pte - pte_start, pte,
+                                               addr_hva2gpa(vm, pte),
+                                               PTE_GET_PFN(*pte),
+                                               !!(*pte & PTE_WRITABLE_MASK),
+                                               !!(*pte & PTE_NX_MASK),
+                                               !!(*pte & PTE_DIRTY_MASK),
+                                               ((uint64_t) n1 << 27)
+                                                       | ((uint64_t) n2 << 18)
+                                                       | ((uint64_t) n3 << 9)
+                                                       | ((uint64_t) n4));
+                               }
+                       }
+               }
+       }
+}
+
+/*
+ * Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *   segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by @segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+       void *gdt = addr_gva2hva(vm, vm->arch.gdt);
+       struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+       desc->limit0 = segp->limit & 0xFFFF;
+       desc->base0 = segp->base & 0xFFFF;
+       desc->base1 = segp->base >> 16;
+       desc->type = segp->type;
+       desc->s = segp->s;
+       desc->dpl = segp->dpl;
+       desc->p = segp->present;
+       desc->limit1 = segp->limit >> 16;
+       desc->avl = segp->avl;
+       desc->l = segp->l;
+       desc->db = segp->db;
+       desc->g = segp->g;
+       desc->base2 = segp->base >> 24;
+       if (!segp->s)
+               desc->base3 = segp->base >> 32;
+}
+
+static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->selector = KERNEL_CS;
+       segp->limit = 0xFFFFFFFFu;
+       segp->s = 0x1; /* kTypeCodeData */
+       segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+                                         * | kFlagCodeReadable
+                                         */
+       segp->g = true;
+       segp->l = true;
+       segp->present = 1;
+}
+
+static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->selector = KERNEL_DS;
+       segp->limit = 0xFFFFFFFFu;
+       segp->s = 0x1; /* kTypeCodeData */
+       segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+                                         * | kFlagDataWritable
+                                         */
+       segp->g = true;
+       segp->present = true;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       int level = PG_LEVEL_NONE;
+       uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+
+       TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+                   "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
+
+       /*
+        * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+        * address bits to be zero.
+        */
+       return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
+}
+
+static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->base = base;
+       segp->limit = 0x67;
+       segp->selector = KERNEL_TSS;
+       segp->type = 0xb;
+       segp->present = 1;
+}
+
+static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       struct kvm_sregs sregs;
+
+       TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+
+       /* Set mode specific system register values. */
+       vcpu_sregs_get(vcpu, &sregs);
+
+       sregs.idt.base = vm->arch.idt;
+       sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+       sregs.gdt.base = vm->arch.gdt;
+       sregs.gdt.limit = getpagesize() - 1;
+
+       sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+       sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+       if (kvm_cpu_has(X86_FEATURE_XSAVE))
+               sregs.cr4 |= X86_CR4_OSXSAVE;
+       sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+       kvm_seg_set_unusable(&sregs.ldt);
+       kvm_seg_set_kernel_code_64bit(&sregs.cs);
+       kvm_seg_set_kernel_data_64bit(&sregs.ds);
+       kvm_seg_set_kernel_data_64bit(&sregs.es);
+       kvm_seg_set_kernel_data_64bit(&sregs.gs);
+       kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
+
+       sregs.cr3 = vm->pgd;
+       vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       struct kvm_xcrs xcrs = {
+               .nr_xcrs = 1,
+               .xcrs[0].xcr = 0,
+               .xcrs[0].value = kvm_cpu_supported_xcr0(),
+       };
+
+       if (!kvm_cpu_has(X86_FEATURE_XSAVE))
+               return;
+
+       vcpu_xcrs_set(vcpu, &xcrs);
+}
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+                         int dpl, unsigned short selector)
+{
+       struct idt_entry *base =
+               (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
+       struct idt_entry *e = &base[vector];
+
+       memset(e, 0, sizeof(*e));
+       e->offset0 = addr;
+       e->selector = selector;
+       e->ist = 0;
+       e->type = 14;
+       e->dpl = dpl;
+       e->p = 1;
+       e->offset1 = addr >> 16;
+       e->offset2 = addr >> 32;
+}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+       if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+               return false;
+
+       if (regs->vector == DE_VECTOR)
+               return false;
+
+       regs->rip = regs->r11;
+       regs->r9 = regs->vector;
+       regs->r10 = regs->error_code;
+       return true;
+}
+
+void route_exception(struct ex_regs *regs)
+{
+       typedef void(*handler)(struct ex_regs *);
+       handler *handlers = (handler *)exception_handlers;
+
+       if (handlers && handlers[regs->vector]) {
+               handlers[regs->vector](regs);
+               return;
+       }
+
+       if (kvm_fixup_exception(regs))
+               return;
+
+       GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+                  regs->vector, regs->rip);
+}
+
+static void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+       extern void *idt_handlers;
+       struct kvm_segment seg;
+       int i;
+
+       vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+       vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+       vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+       vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+
+       /* Handlers have the same address in both address spaces.*/
+       for (i = 0; i < NUM_INTERRUPTS; i++)
+               set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+
+       kvm_seg_set_kernel_code_64bit(&seg);
+       kvm_seg_fill_gdt_64bit(vm, &seg);
+
+       kvm_seg_set_kernel_data_64bit(&seg);
+       kvm_seg_fill_gdt_64bit(vm, &seg);
+
+       kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
+       kvm_seg_fill_gdt_64bit(vm, &seg);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                              void (*handler)(struct ex_regs *))
+{
+       vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+       handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+               REPORT_GUEST_ASSERT(uc);
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+       int r;
+
+       TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
+                   "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
+
+       vm_create_irqchip(vm);
+       vm_init_descriptor_tables(vm);
+
+       sync_global_to_guest(vm, host_cpu_is_intel);
+       sync_global_to_guest(vm, host_cpu_is_amd);
+       sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+               struct kvm_sev_init init = { 0 };
+
+               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+       }
+
+       r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
+       TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
+       guest_tsc_khz = r;
+       sync_global_to_guest(vm, guest_tsc_khz);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       struct kvm_regs regs;
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip = (unsigned long) guest_code;
+       vcpu_regs_set(vcpu, &regs);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       struct kvm_mp_state mp_state;
+       struct kvm_regs regs;
+       vm_vaddr_t stack_vaddr;
+       struct kvm_vcpu *vcpu;
+
+       stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+                                      DEFAULT_GUEST_STACK_VADDR_MIN,
+                                      MEM_REGION_DATA);
+
+       stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+       /*
+        * Align stack to match calling sequence requirements in section "The
+        * Stack Frame" of the System V ABI AMD64 Architecture Processor
+        * Supplement, which requires the value (%rsp + 8) to be a multiple of
+        * 16 when control is transferred to the function entry point.
+        *
+        * If this code is ever used to launch a vCPU with 32-bit entry point it
+        * may need to subtract 4 bytes instead of 8 bytes.
+        */
+       TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+                   "__vm_vaddr_alloc() did not provide a page-aligned address");
+       stack_vaddr -= 8;
+
+       vcpu = __vm_vcpu_add(vm, vcpu_id);
+       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+       vcpu_init_sregs(vm, vcpu);
+       vcpu_init_xcrs(vm, vcpu);
+
+       /* Setup guest general purpose registers */
+       vcpu_regs_get(vcpu, &regs);
+       regs.rflags = regs.rflags | 0x2;
+       regs.rsp = stack_vaddr;
+       vcpu_regs_set(vcpu, &regs);
+
+       /* Setup the MP state */
+       mp_state.mp_state = 0;
+       vcpu_mp_state_set(vcpu, &mp_state);
+
+       /*
+        * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
+        * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
+        * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
+        * is consistent with vCPU state.
+        */
+       vcpu_get_cpuid(vcpu);
+       return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+
+       return vcpu;
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->cpuid)
+               free(vcpu->cpuid);
+}
+
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+       int kvm_fd;
+
+       if (kvm_supported_cpuid)
+               return kvm_supported_cpuid;
+
+       kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+                 (struct kvm_cpuid2 *)kvm_supported_cpuid);
+
+       close(kvm_fd);
+       return kvm_supported_cpuid;
+}
+
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+                             uint32_t function, uint32_t index,
+                             uint8_t reg, uint8_t lo, uint8_t hi)
+{
+       const struct kvm_cpuid_entry2 *entry;
+       int i;
+
+       for (i = 0; i < cpuid->nent; i++) {
+               entry = &cpuid->entries[i];
+
+               /*
+                * The output registers in kvm_cpuid_entry2 are in alphabetical
+                * order, but kvm_x86_cpu_feature matches that mess, so yay
+                * pointer shenanigans!
+                */
+               if (entry->function == function && entry->index == index)
+                       return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
+       }
+
+       return 0;
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+                  struct kvm_x86_cpu_feature feature)
+{
+       return __kvm_cpu_has(cpuid, feature.function, feature.index,
+                            feature.reg, feature.bit, feature.bit);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+                           struct kvm_x86_cpu_property property)
+{
+       return __kvm_cpu_has(cpuid, property.function, property.index,
+                            property.reg, property.lo_bit, property.hi_bit);
+}
+
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
+{
+       struct {
+               struct kvm_msrs header;
+               struct kvm_msr_entry entry;
+       } buffer = {};
+       int r, kvm_fd;
+
+       buffer.header.nmsrs = 1;
+       buffer.entry.index = msr_index;
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+       TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
+
+       close(kvm_fd);
+       return buffer.entry.data;
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
+{
+       int kvm_fd;
+       u64 bitmask;
+       long rc;
+       struct kvm_device_attr attr = {
+               .group = 0,
+               .attr = KVM_X86_XCOMP_GUEST_SUPP,
+               .addr = (unsigned long) &bitmask,
+       };
+
+       TEST_ASSERT(!kvm_supported_cpuid,
+                   "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
+
+       TEST_ASSERT(is_power_of_2(xfeature),
+                   "Dynamic XFeatures must be enabled one at a time");
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+       rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+       close(kvm_fd);
+
+       if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+               __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
+
+       TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+
+       __TEST_REQUIRE(bitmask & xfeature,
+                      "Required XSAVE feature '%s' not supported", name);
+
+       TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
+
+       rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+       TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+       TEST_ASSERT(bitmask & xfeature,
+                   "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+                   name, xfeature, bitmask);
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
+{
+       TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
+
+       /* Allow overriding the default CPUID. */
+       if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+               free(vcpu->cpuid);
+               vcpu->cpuid = NULL;
+       }
+
+       if (!vcpu->cpuid)
+               vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
+
+       memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+       vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value)
+{
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+       (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+       (&entry->eax)[property.reg] |= value << property.lo_bit;
+
+       vcpu_set_cpuid(vcpu);
+
+       /* Sanity check that @value doesn't exceed the bounds in any way. */
+       TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
+}
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+{
+       struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
+
+       entry->eax = 0;
+       entry->ebx = 0;
+       entry->ecx = 0;
+       entry->edx = 0;
+       vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+                                    struct kvm_x86_cpu_feature feature,
+                                    bool set)
+{
+       struct kvm_cpuid_entry2 *entry;
+       u32 *reg;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+       reg = (&entry->eax) + feature.reg;
+
+       if (set)
+               *reg |= BIT(feature.bit);
+       else
+               *reg &= ~BIT(feature.bit);
+
+       vcpu_set_cpuid(vcpu);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
+{
+       struct {
+               struct kvm_msrs header;
+               struct kvm_msr_entry entry;
+       } buffer = {};
+
+       buffer.header.nmsrs = 1;
+       buffer.entry.index = msr_index;
+
+       vcpu_msrs_get(vcpu, &buffer.header);
+
+       return buffer.entry.data;
+}
+
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
+{
+       struct {
+               struct kvm_msrs header;
+               struct kvm_msr_entry entry;
+       } buffer = {};
+
+       memset(&buffer, 0, sizeof(buffer));
+       buffer.header.nmsrs = 1;
+       buffer.entry.index = msr_index;
+       buffer.entry.data = msr_value;
+
+       return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+       va_list ap;
+       struct kvm_regs regs;
+
+       TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+                   "  num: %u",
+                   num);
+
+       va_start(ap, num);
+       vcpu_regs_get(vcpu, &regs);
+
+       if (num >= 1)
+               regs.rdi = va_arg(ap, uint64_t);
+
+       if (num >= 2)
+               regs.rsi = va_arg(ap, uint64_t);
+
+       if (num >= 3)
+               regs.rdx = va_arg(ap, uint64_t);
+
+       if (num >= 4)
+               regs.rcx = va_arg(ap, uint64_t);
+
+       if (num >= 5)
+               regs.r8 = va_arg(ap, uint64_t);
+
+       if (num >= 6)
+               regs.r9 = va_arg(ap, uint64_t);
+
+       vcpu_regs_set(vcpu, &regs);
+       va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+       struct kvm_regs regs;
+       struct kvm_sregs sregs;
+
+       fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
+
+       fprintf(stream, "%*sregs:\n", indent + 2, "");
+       vcpu_regs_get(vcpu, &regs);
+       regs_dump(stream, &regs, indent + 4);
+
+       fprintf(stream, "%*ssregs:\n", indent + 2, "");
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs_dump(stream, &sregs, indent + 4);
+}
+
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
+{
+       struct kvm_msr_list *list;
+       struct kvm_msr_list nmsrs;
+       int kvm_fd, r;
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       nmsrs.nmsrs = 0;
+       if (!feature_msrs)
+               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+       else
+               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
+
+       TEST_ASSERT(r == -1 && errno == E2BIG,
+                   "Expected -E2BIG, got rc: %i errno: %i (%s)",
+                   r, errno, strerror(errno));
+
+       list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+       TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+       list->nmsrs = nmsrs.nmsrs;
+
+       if (!feature_msrs)
+               kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+       else
+               kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+       close(kvm_fd);
+
+       TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+                   "Number of MSRs in list changed, was %d, now %d",
+                   nmsrs.nmsrs, list->nmsrs);
+       return list;
+}
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+       static const struct kvm_msr_list *list;
+
+       if (!list)
+               list = __kvm_get_msr_index_list(false);
+       return list;
+}
+
+
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
+{
+       static const struct kvm_msr_list *list;
+
+       if (!list)
+               list = __kvm_get_msr_index_list(true);
+       return list;
+}
+
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+{
+       const struct kvm_msr_list *list = kvm_get_msr_index_list();
+       int i;
+
+       for (i = 0; i < list->nmsrs; ++i) {
+               if (list->indices[i] == msr_index)
+                       return true;
+       }
+
+       return false;
+}
+
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+                                 struct kvm_x86_state *state)
+{
+       int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+       if (size) {
+               state->xsave = malloc(size);
+               vcpu_xsave2_get(vcpu, state->xsave);
+       } else {
+               state->xsave = malloc(sizeof(struct kvm_xsave));
+               vcpu_xsave_get(vcpu, state->xsave);
+       }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+       const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
+       struct kvm_x86_state *state;
+       int i;
+
+       static int nested_size = -1;
+
+       if (nested_size == -1) {
+               nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+               TEST_ASSERT(nested_size <= sizeof(state->nested_),
+                           "Nested state size too big, %i > %zi",
+                           nested_size, sizeof(state->nested_));
+       }
+
+       /*
+        * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+        * guest state is consistent only after userspace re-enters the
+        * kernel with KVM_RUN.  Complete IO prior to migrating state
+        * to a new VM.
+        */
+       vcpu_run_complete_io(vcpu);
+
+       state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+       TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
+
+       vcpu_events_get(vcpu, &state->events);
+       vcpu_mp_state_get(vcpu, &state->mp_state);
+       vcpu_regs_get(vcpu, &state->regs);
+       vcpu_save_xsave_state(vcpu, state);
+
+       if (kvm_has_cap(KVM_CAP_XCRS))
+               vcpu_xcrs_get(vcpu, &state->xcrs);
+
+       vcpu_sregs_get(vcpu, &state->sregs);
+
+       if (nested_size) {
+               state->nested.size = sizeof(state->nested_);
+
+               vcpu_nested_state_get(vcpu, &state->nested);
+               TEST_ASSERT(state->nested.size <= nested_size,
+                           "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+                           state->nested.size, nested_size);
+       } else {
+               state->nested.size = 0;
+       }
+
+       state->msrs.nmsrs = msr_list->nmsrs;
+       for (i = 0; i < msr_list->nmsrs; i++)
+               state->msrs.entries[i].index = msr_list->indices[i];
+       vcpu_msrs_get(vcpu, &state->msrs);
+
+       vcpu_debugregs_get(vcpu, &state->debugregs);
+
+       return state;
+}
+
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
+{
+       vcpu_sregs_set(vcpu, &state->sregs);
+       vcpu_msrs_set(vcpu, &state->msrs);
+
+       if (kvm_has_cap(KVM_CAP_XCRS))
+               vcpu_xcrs_set(vcpu, &state->xcrs);
+
+       vcpu_xsave_set(vcpu,  state->xsave);
+       vcpu_events_set(vcpu, &state->events);
+       vcpu_mp_state_set(vcpu, &state->mp_state);
+       vcpu_debugregs_set(vcpu, &state->debugregs);
+       vcpu_regs_set(vcpu, &state->regs);
+
+       if (state->nested.size)
+               vcpu_nested_state_set(vcpu, &state->nested);
+}
+
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+       free(state->xsave);
+       free(state);
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+       if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
+               *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
+               *va_bits = 32;
+       } else {
+               *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+               *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
+       }
+}
+
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+               vm->arch.sev_fd = open_sev_dev_path_or_exit();
+               vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+               vm->gpa_tag_mask = vm->arch.c_bit;
+       } else {
+               vm->arch.sev_fd = -1;
+       }
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+                                              uint32_t function, uint32_t index)
+{
+       int i;
+
+       for (i = 0; i < cpuid->nent; i++) {
+               if (cpuid->entries[i].function == function &&
+                   cpuid->entries[i].index == index)
+                       return &cpuid->entries[i];
+       }
+
+       TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+       return NULL;
+}
+
+#define X86_HYPERCALL(inputs...)                                       \
+({                                                                     \
+       uint64_t r;                                                     \
+                                                                       \
+       asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t"          \
+                    "jnz 1f\n\t"                                       \
+                    "vmcall\n\t"                                       \
+                    "jmp 2f\n\t"                                       \
+                    "1: vmmcall\n\t"                                   \
+                    "2:"                                               \
+                    : "=a"(r)                                          \
+                    : [use_vmmcall] "r" (host_cpu_is_amd), inputs);    \
+                                                                       \
+       r;                                                              \
+})
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+                      uint64_t a3)
+{
+       return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+}
+
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+       return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+       GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+       const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+       unsigned long ht_gfn, max_gfn, max_pfn;
+       uint8_t maxphyaddr, guest_maxphyaddr;
+
+       /*
+        * Use "guest MAXPHYADDR" from KVM if it's available.  Guest MAXPHYADDR
+        * enumerates the max _mappable_ GPA, which can be less than the raw
+        * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+        * doesn't support 5-level TDP.
+        */
+       guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+       guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+       TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+                   "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
+
+       max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
+
+       /* Avoid reserved HyperTransport region on AMD processors.  */
+       if (!host_cpu_is_amd)
+               return max_gfn;
+
+       /* On parts with <40 physical address bits, the area is fully hidden */
+       if (vm->pa_bits < 40)
+               return max_gfn;
+
+       /* Before family 17h, the HyperTransport area is just below 1T.  */
+       ht_gfn = (1 << 28) - num_ht_pages;
+       if (this_cpu_family() < 0x17)
+               goto done;
+
+       /*
+        * Otherwise it's at the top of the physical address space, possibly
+        * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
+        * the old conservative value if MAXPHYADDR is not enumerated.
+        */
+       if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
+               goto done;
+
+       maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+       max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+       if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+               max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
+
+       ht_gfn = max_pfn - num_ht_pages;
+done:
+       return min(max_gfn, ht_gfn - 1);
+}
+
+/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+       /* Ensure that a KVM vendor-specific module is loaded. */
+       if (vm == NULL)
+               close(open_kvm_dev_path_or_exit());
+
+       return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+void kvm_selftest_arch_init(void)
+{
+       host_cpu_is_intel = this_cpu_is_intel();
+       host_cpu_is_amd = this_cpu_is_amd();
+       is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+       char *clk_name = sys_get_cur_clocksource();
+       bool ret = !strcmp(clk_name, "tsc\n") ||
+                  !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+       free(clk_name);
+
+       return ret;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c

new file mode 100644 (file)

index 0000000..e9535ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+       const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+       const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+       const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+       sparsebit_idx_t i, j;
+
+       if (!sparsebit_any_set(protected_phy_pages))
+               return;
+
+       sev_register_encrypted_memory(vm, region);
+
+       sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+               const uint64_t size = (j - i + 1) * vm->page_size;
+               const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+               sev_launch_update_data(vm, gpa_base + offset, size);
+       }
+}
+
+void sev_vm_init(struct kvm_vm *vm)
+{
+       if (vm->type == KVM_X86_DEFAULT_VM) {
+               assert(vm->arch.sev_fd == -1);
+               vm->arch.sev_fd = open_sev_dev_path_or_exit();
+               vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+       } else {
+               struct kvm_sev_init init = { 0 };
+               assert(vm->type == KVM_X86_SEV_VM);
+               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+       }
+}
+
+void sev_es_vm_init(struct kvm_vm *vm)
+{
+       if (vm->type == KVM_X86_DEFAULT_VM) {
+               assert(vm->arch.sev_fd == -1);
+               vm->arch.sev_fd = open_sev_dev_path_or_exit();
+               vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+       } else {
+               struct kvm_sev_init init = { 0 };
+               assert(vm->type == KVM_X86_SEV_ES_VM);
+               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+       }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+       struct kvm_sev_launch_start launch_start = {
+               .policy = policy,
+       };
+       struct userspace_mem_region *region;
+       struct kvm_sev_guest_status status;
+       int ctr;
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+       TEST_ASSERT_EQ(status.policy, policy);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+       hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+               encrypt_region(vm, region);
+
+       if (policy & SEV_POLICY_ES)
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+       struct kvm_sev_launch_measure launch_measure;
+       struct kvm_sev_guest_status guest_status;
+
+       launch_measure.len = 256;
+       launch_measure.uaddr = (__u64)measurement;
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+       TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+       struct kvm_sev_guest_status status;
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+                   status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+                   "Unexpected guest state: %d", status.state);
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+                                          struct kvm_vcpu **cpu)
+{
+       struct vm_shape shape = {
+               .mode = VM_MODE_DEFAULT,
+               .type = type,
+       };
+       struct kvm_vm *vm;
+       struct kvm_vcpu *cpus[1];
+
+       vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+       *cpu = cpus[0];
+
+       return vm;
+}
+
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+{
+       sev_vm_launch(vm, policy);
+
+       if (!measurement)
+               measurement = alloca(256);
+
+       sev_vm_launch_measure(vm, measurement);
+
+       sev_vm_launch_finish(vm);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c

new file mode 100644 (file)

index 0000000..d239c20
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers used for nested SVM testing
+ * Largely inspired from KVM unit test svm.c
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define SEV_DEV_PATH "/dev/sev"
+
+struct gpr64_regs guest_regs;
+u64 rflags;
+
+/* Allocate memory regions for nested SVM tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_svm_gva - The guest virtual address for the struct svm_test_data.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the SVM areas.
+ */
+struct svm_test_data *
+vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+{
+       vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
+       struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
+
+       svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
+       svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
+       svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
+
+       svm->save_area = (void *)vm_vaddr_alloc_page(vm);
+       svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
+       svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+
+       svm->msr = (void *)vm_vaddr_alloc_page(vm);
+       svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+       svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+       memset(svm->msr_hva, 0, getpagesize());
+
+       *p_svm_gva = svm_gva;
+       return svm;
+}
+
+static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+                        u64 base, u32 limit, u32 attr)
+{
+       seg->selector = selector;
+       seg->attrib = attr;
+       seg->limit = limit;
+       seg->base = base;
+}
+
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+{
+       struct vmcb *vmcb = svm->vmcb;
+       uint64_t vmcb_gpa = svm->vmcb_gpa;
+       struct vmcb_save_area *save = &vmcb->save;
+       struct vmcb_control_area *ctrl = &vmcb->control;
+       u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+             | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+       u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+               | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+       uint64_t efer;
+
+       efer = rdmsr(MSR_EFER);
+       wrmsr(MSR_EFER, efer | EFER_SVME);
+       wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+       memset(vmcb, 0, sizeof(*vmcb));
+       asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
+       vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+       vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+       vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
+       vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
+       vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
+       vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
+
+       ctrl->asid = 1;
+       save->cpl = 0;
+       save->efer = rdmsr(MSR_EFER);
+       asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
+       asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
+       asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
+       asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
+       asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
+       asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
+       save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+       save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+       ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+                               (1ULL << INTERCEPT_VMMCALL);
+       ctrl->msrpm_base_pa = svm->msr_gpa;
+
+       vmcb->save.rip = (u64)guest_rip;
+       vmcb->save.rsp = (u64)guest_rsp;
+       guest_regs.rdi = (u64)svm;
+}
+
+/*
+ * save/restore 64-bit general registers except rax, rip, rsp
+ * which are directly handed through the VMCB guest processor state
+ */
+#define SAVE_GPR_C                             \
+       "xchg %%rbx, guest_regs+0x20\n\t"       \
+       "xchg %%rcx, guest_regs+0x10\n\t"       \
+       "xchg %%rdx, guest_regs+0x18\n\t"       \
+       "xchg %%rbp, guest_regs+0x30\n\t"       \
+       "xchg %%rsi, guest_regs+0x38\n\t"       \
+       "xchg %%rdi, guest_regs+0x40\n\t"       \
+       "xchg %%r8,  guest_regs+0x48\n\t"       \
+       "xchg %%r9,  guest_regs+0x50\n\t"       \
+       "xchg %%r10, guest_regs+0x58\n\t"       \
+       "xchg %%r11, guest_regs+0x60\n\t"       \
+       "xchg %%r12, guest_regs+0x68\n\t"       \
+       "xchg %%r13, guest_regs+0x70\n\t"       \
+       "xchg %%r14, guest_regs+0x78\n\t"       \
+       "xchg %%r15, guest_regs+0x80\n\t"
+
+#define LOAD_GPR_C      SAVE_GPR_C
+
+/*
+ * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
+ * for now. registers involved in LOAD/SAVE_GPR_C are eventually
+ * unmodified so they do not need to be in the clobber list.
+ */
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+{
+       asm volatile (
+               "vmload %[vmcb_gpa]\n\t"
+               "mov rflags, %%r15\n\t" // rflags
+               "mov %%r15, 0x170(%[vmcb])\n\t"
+               "mov guest_regs, %%r15\n\t"     // rax
+               "mov %%r15, 0x1f8(%[vmcb])\n\t"
+               LOAD_GPR_C
+               "vmrun %[vmcb_gpa]\n\t"
+               SAVE_GPR_C
+               "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
+               "mov %%r15, rflags\n\t"
+               "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
+               "mov %%r15, guest_regs\n\t"
+               "vmsave %[vmcb_gpa]\n\t"
+               : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
+               : "r15", "memory");
+}
+
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ *   The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
+{
+       return open_path_or_exit(SEV_DEV_PATH, 0);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c

new file mode 100644 (file)

index 0000000..1265cec
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/ucall.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /*
+        * FIXME: Revert this hack (the entire commit that added it) once nVMX
+        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
+        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
+        * in particular is problematic) along with RDX and RDI (which are
+        * inputs), and clobber volatile GPRs. *sigh*
+        */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+       "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+       asm volatile("push %%rbp\n\t"
+                    "push %%r15\n\t"
+                    "push %%r14\n\t"
+                    "push %%r13\n\t"
+                    "push %%r12\n\t"
+                    "push %%rbx\n\t"
+                    "push %%rdx\n\t"
+                    "push %%rdi\n\t"
+                    "in %[port], %%al\n\t"
+                    "pop %%rdi\n\t"
+                    "pop %%rdx\n\t"
+                    "pop %%rbx\n\t"
+                    "pop %%r12\n\t"
+                    "pop %%r13\n\t"
+                    "pop %%r14\n\t"
+                    "pop %%r15\n\t"
+                    "pop %%rbp\n\t"
+               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
+               struct kvm_regs regs;
+
+               vcpu_regs_get(vcpu, &regs);
+               return (void *)regs.rdi;
+       }
+       return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c

new file mode 100644 (file)

index 0000000..d4d1208
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include <asm/msr-index.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PAGE_SHIFT_4K  12
+
+#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+
+bool enable_evmcs;
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+struct eptPageTableEntry {
+       uint64_t readable:1;
+       uint64_t writable:1;
+       uint64_t executable:1;
+       uint64_t memory_type:3;
+       uint64_t ignore_pat:1;
+       uint64_t page_size:1;
+       uint64_t accessed:1;
+       uint64_t dirty:1;
+       uint64_t ignored_11_10:2;
+       uint64_t address:40;
+       uint64_t ignored_62_52:11;
+       uint64_t suppress_ve:1;
+};
+
+struct eptPageTablePointer {
+       uint64_t memory_type:3;
+       uint64_t page_walk_length:3;
+       uint64_t ad_enabled:1;
+       uint64_t reserved_11_07:5;
+       uint64_t address:40;
+       uint64_t reserved_63_52:12;
+};
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
+{
+       uint16_t evmcs_ver;
+
+       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+                       (unsigned long)&evmcs_ver);
+
+       /* KVM should return supported EVMCS version range */
+       TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
+                   (evmcs_ver & 0xff) > 0,
+                   "Incorrect EVMCS version range: %x:%x",
+                   evmcs_ver & 0xff, evmcs_ver >> 8);
+
+       return evmcs_ver;
+}
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+       vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
+       struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+       /* Setup of a region of guest memory for the vmxon region. */
+       vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+       vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+       /* Setup of a region of guest memory for a vmcs. */
+       vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+       vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+       /* Setup of a region of guest memory for the MSR bitmap. */
+       vmx->msr = (void *)vm_vaddr_alloc_page(vm);
+       vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+       vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+       memset(vmx->msr_hva, 0, getpagesize());
+
+       /* Setup of a region of guest memory for the shadow VMCS. */
+       vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
+       vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+       vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+       /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+       vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+       vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+       memset(vmx->vmread_hva, 0, getpagesize());
+
+       vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+       vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+       memset(vmx->vmwrite_hva, 0, getpagesize());
+
+       *p_vmx_gva = vmx_gva;
+       return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+       uint64_t feature_control;
+       uint64_t required;
+       unsigned long cr0;
+       unsigned long cr4;
+
+       /*
+        * Ensure bits in CR0 and CR4 are valid in VMX operation:
+        * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+        * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+        */
+       __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+       cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+       cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+       __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+       __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+       cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+       cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+       /* Enable VMX operation */
+       cr4 |= X86_CR4_VMXE;
+       __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+       /*
+        * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+        *  Bit 0: Lock bit. If clear, VMXON causes a #GP.
+        *  Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+        *    outside of SMX causes a #GP.
+        */
+       required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+       required |= FEAT_CTL_LOCKED;
+       feature_control = rdmsr(MSR_IA32_FEAT_CTL);
+       if ((feature_control & required) != required)
+               wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
+
+       /* Enter VMX root operation. */
+       *(uint32_t *)(vmx->vmxon) = vmcs_revision();
+       if (vmxon(vmx->vmxon_gpa))
+               return false;
+
+       return true;
+}
+
+bool load_vmcs(struct vmx_pages *vmx)
+{
+       /* Load a VMCS. */
+       *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+       if (vmclear(vmx->vmcs_gpa))
+               return false;
+
+       if (vmptrld(vmx->vmcs_gpa))
+               return false;
+
+       /* Setup shadow VMCS, do not load it yet. */
+       *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+       if (vmclear(vmx->shadow_vmcs_gpa))
+               return false;
+
+       return true;
+}
+
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+       return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+       return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+       uint32_t sec_exec_ctl = 0;
+
+       vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+       vmwrite(POSTED_INTR_NV, 0);
+
+       vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+
+       if (vmx->eptp_gpa) {
+               uint64_t ept_paddr;
+               struct eptPageTablePointer eptp = {
+                       .memory_type = X86_MEMTYPE_WB,
+                       .page_walk_length = 3, /* + 1 */
+                       .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
+                       .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
+               };
+
+               memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
+               vmwrite(EPT_POINTER, ept_paddr);
+               sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
+       }
+
+       if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
+               vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                       rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+       else {
+               vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+               GUEST_ASSERT(!sec_exec_ctl);
+       }
+
+       vmwrite(EXCEPTION_BITMAP, 0);
+       vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+       vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+       vmwrite(CR3_TARGET_COUNT, 0);
+       vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+               VM_EXIT_HOST_ADDR_SPACE_SIZE);    /* 64-bit host */
+       vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+       vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+       vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+               VM_ENTRY_IA32E_MODE);             /* 64-bit guest */
+       vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+       vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+       vmwrite(TPR_THRESHOLD, 0);
+
+       vmwrite(CR0_GUEST_HOST_MASK, 0);
+       vmwrite(CR4_GUEST_HOST_MASK, 0);
+       vmwrite(CR0_READ_SHADOW, get_cr0());
+       vmwrite(CR4_READ_SHADOW, get_cr4());
+
+       vmwrite(MSR_BITMAP, vmx->msr_gpa);
+       vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+       vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+       uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+       vmwrite(HOST_ES_SELECTOR, get_es());
+       vmwrite(HOST_CS_SELECTOR, get_cs());
+       vmwrite(HOST_SS_SELECTOR, get_ss());
+       vmwrite(HOST_DS_SELECTOR, get_ds());
+       vmwrite(HOST_FS_SELECTOR, get_fs());
+       vmwrite(HOST_GS_SELECTOR, get_gs());
+       vmwrite(HOST_TR_SELECTOR, get_tr());
+
+       if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+               vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+       if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+               vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+       if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+               vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+                       rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+       vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+       vmwrite(HOST_CR0, get_cr0());
+       vmwrite(HOST_CR3, get_cr3());
+       vmwrite(HOST_CR4, get_cr4());
+       vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+       vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+       vmwrite(HOST_TR_BASE,
+               get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
+       vmwrite(HOST_GDTR_BASE, get_gdt().address);
+       vmwrite(HOST_IDTR_BASE, get_idt().address);
+       vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+       vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+       vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+       vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+       vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+       vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+       vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+       vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+       vmwrite(GUEST_LDTR_SELECTOR, 0);
+       vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+       vmwrite(GUEST_INTR_STATUS, 0);
+       vmwrite(GUEST_PML_INDEX, 0);
+
+       vmwrite(VMCS_LINK_POINTER, -1ll);
+       vmwrite(GUEST_IA32_DEBUGCTL, 0);
+       vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+       vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+       vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+               vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+       vmwrite(GUEST_ES_LIMIT, -1);
+       vmwrite(GUEST_CS_LIMIT, -1);
+       vmwrite(GUEST_SS_LIMIT, -1);
+       vmwrite(GUEST_DS_LIMIT, -1);
+       vmwrite(GUEST_FS_LIMIT, -1);
+       vmwrite(GUEST_GS_LIMIT, -1);
+       vmwrite(GUEST_LDTR_LIMIT, -1);
+       vmwrite(GUEST_TR_LIMIT, 0x67);
+       vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+       vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+       vmwrite(GUEST_ES_AR_BYTES,
+               vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+       vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+       vmwrite(GUEST_DS_AR_BYTES,
+               vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_FS_AR_BYTES,
+               vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_GS_AR_BYTES,
+               vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+       vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+       vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+       vmwrite(GUEST_ACTIVITY_STATE, 0);
+       vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+       vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+       vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+       vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+       vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+       vmwrite(GUEST_ES_BASE, 0);
+       vmwrite(GUEST_CS_BASE, 0);
+       vmwrite(GUEST_SS_BASE, 0);
+       vmwrite(GUEST_DS_BASE, 0);
+       vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+       vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+       vmwrite(GUEST_LDTR_BASE, 0);
+       vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+       vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+       vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+       vmwrite(GUEST_DR7, 0x400);
+       vmwrite(GUEST_RSP, (uint64_t)rsp);
+       vmwrite(GUEST_RIP, (uint64_t)rip);
+       vmwrite(GUEST_RFLAGS, 2);
+       vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+       vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+       vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+       init_vmcs_control_fields(vmx);
+       init_vmcs_host_state();
+       init_vmcs_guest_state(guest_rip, guest_rsp);
+}
+
+static void nested_create_pte(struct kvm_vm *vm,
+                             struct eptPageTableEntry *pte,
+                             uint64_t nested_paddr,
+                             uint64_t paddr,
+                             int current_level,
+                             int target_level)
+{
+       if (!pte->readable) {
+               pte->writable = true;
+               pte->readable = true;
+               pte->executable = true;
+               pte->page_size = (current_level == target_level);
+               if (pte->page_size)
+                       pte->address = paddr >> vm->page_shift;
+               else
+                       pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+       } else {
+               /*
+                * Entry already present.  Assert that the caller doesn't want
+                * a hugepage at this level, and that there isn't a hugepage at
+                * this level.
+                */
+               TEST_ASSERT(current_level != target_level,
+                           "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
+                           current_level, nested_paddr);
+               TEST_ASSERT(!pte->page_size,
+                           "Cannot create page table at level: %u, nested_paddr: 0x%lx",
+                           current_level, nested_paddr);
+       }
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                    uint64_t nested_paddr, uint64_t paddr, int target_level)
+{
+       const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+       struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+       uint16_t index;
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+                   "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       TEST_ASSERT((nested_paddr >> 48) == 0,
+                   "Nested physical address 0x%lx requires 5-level paging",
+                   nested_paddr);
+       TEST_ASSERT((nested_paddr % page_size) == 0,
+                   "Nested physical address not on page boundary,\n"
+                   "  nested_paddr: 0x%lx page_size: 0x%lx",
+                   nested_paddr, page_size);
+       TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond beyond maximum supported,\n"
+                   "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT((paddr % page_size) == 0,
+                   "Physical address not on page boundary,\n"
+                   "  paddr: 0x%lx page_size: 0x%lx",
+                   paddr, page_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond beyond maximum supported,\n"
+                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+
+       for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+               index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+               pte = &pt[index];
+
+               nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
+
+               if (pte->page_size)
+                       break;
+
+               pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+       }
+
+       /*
+        * For now mark these as accessed and dirty because the only
+        * testcase we have needs that.  Can be reconsidered later.
+        */
+       pte->accessed = true;
+       pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                  uint64_t nested_paddr, uint64_t paddr)
+{
+       __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
+}
+
+/*
+ * Map a range of EPT guest physical addresses to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   nested_paddr - Nested guest physical address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   level - The level at which to map the range
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a nested guest translation for the
+ * page range starting at nested_paddr to the page range starting at paddr.
+ */
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+                 int level)
+{
+       size_t page_size = PG_LEVEL_SIZE(level);
+       size_t npages = size / page_size;
+
+       TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+       TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+       while (npages--) {
+               __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
+               nested_paddr += page_size;
+               paddr += page_size;
+       }
+}
+
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+               uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+       __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+                       uint32_t memslot)
+{
+       sparsebit_idx_t i, last;
+       struct userspace_mem_region *region =
+               memslot2region(vm, memslot);
+
+       i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+       last = i + (region->region.memory_size >> vm->page_shift);
+       for (;;) {
+               i = sparsebit_next_clear(region->unused_phy_pages, i);
+               if (i > last)
+                       break;
+
+               nested_map(vmx, vm,
+                          (uint64_t)i << vm->page_shift,
+                          (uint64_t)i << vm->page_shift,
+                          1 << vm->page_shift);
+       }
+}
+
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+                           uint64_t addr, uint64_t size)
+{
+       __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
+bool kvm_cpu_has_ept(void)
+{
+       uint64_t ctrl;
+
+       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+       if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+               return false;
+
+       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+       return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint32_t eptp_memslot)
+{
+       TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+       vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
+       vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
+       vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
+}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+       vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
+       vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+       vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c

deleted file mode 100644 (file)

index 89153a3..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Google LLC.
- */
-
-#include "apic.h"
-
-void apic_disable(void)
-{
-       wrmsr(MSR_IA32_APICBASE,
-             rdmsr(MSR_IA32_APICBASE) &
-               ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void xapic_enable(void)
-{
-       uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
-       /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
-       if (val & MSR_IA32_APICBASE_EXTD) {
-               apic_disable();
-               wrmsr(MSR_IA32_APICBASE,
-                     rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
-       } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
-               wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
-       }
-
-       /*
-        * Per SDM: reset value of spurious interrupt vector register has the
-        * APIC software enabled bit=0. It must be enabled in addition to the
-        * enable bit in the MSR.
-        */
-       val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
-       xapic_write_reg(APIC_SPIV, val);
-}
-
-void x2apic_enable(void)
-{
-       wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
-             MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
-       x2apic_write_reg(APIC_SPIV,
-                        x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S

deleted file mode 100644 (file)

index 7629819..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ /dev/null
@@ -1,81 +0,0 @@
-handle_exception:
-       push %r15
-       push %r14
-       push %r13
-       push %r12
-       push %r11
-       push %r10
-       push %r9
-       push %r8
-
-       push %rdi
-       push %rsi
-       push %rbp
-       push %rbx
-       push %rdx
-       push %rcx
-       push %rax
-       mov %rsp, %rdi
-
-       call route_exception
-
-       pop %rax
-       pop %rcx
-       pop %rdx
-       pop %rbx
-       pop %rbp
-       pop %rsi
-       pop %rdi
-       pop %r8
-       pop %r9
-       pop %r10
-       pop %r11
-       pop %r12
-       pop %r13
-       pop %r14
-       pop %r15
-
-       /* Discard vector and error code. */
-       add $16, %rsp
-       iretq
-
-/*
- * Build the handle_exception wrappers which push the vector/error code on the
- * stack and an array of pointers to those wrappers.
- */
-.pushsection .rodata
-.globl idt_handlers
-idt_handlers:
-.popsection
-
-.macro HANDLERS has_error from to
-       vector = \from
-       .rept \to - \from + 1
-       .align 8
-
-       /* Fetch current address and append it to idt_handlers. */
-666 :
-.pushsection .rodata
-       .quad 666b
-.popsection
-
-       .if ! \has_error
-       pushq $0
-       .endif
-       pushq $vector
-       jmp handle_exception
-       vector = vector + 1
-       .endr
-.endm
-
-.global idt_handler_code
-idt_handler_code:
-       HANDLERS has_error=0 from=0  to=7
-       HANDLERS has_error=1 from=8  to=8
-       HANDLERS has_error=0 from=9  to=9
-       HANDLERS has_error=1 from=10 to=14
-       HANDLERS has_error=0 from=15 to=16
-       HANDLERS has_error=1 from=17 to=17
-       HANDLERS has_error=0 from=18 to=255
-
-.section        .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c

deleted file mode 100644 (file)

index 15bc8cd..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Hyper-V specific functions.
- *
- * Copyright (C) 2021, Red Hat Inc.
- */
-#include <stdint.h>
-#include "processor.h"
-#include "hyperv.h"
-
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
-{
-       static struct kvm_cpuid2 *cpuid;
-       int kvm_fd;
-
-       if (cpuid)
-               return cpuid;
-
-       cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
-       close(kvm_fd);
-       return cpuid;
-}
-
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
-{
-       static struct kvm_cpuid2 *cpuid_full;
-       const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
-       int i, nent = 0;
-
-       if (!cpuid_full) {
-               cpuid_sys = kvm_get_supported_cpuid();
-               cpuid_hv = kvm_get_supported_hv_cpuid();
-
-               cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
-               if (!cpuid_full) {
-                       perror("malloc");
-                       abort();
-               }
-
-               /* Need to skip KVM CPUID leaves 0x400000xx */
-               for (i = 0; i < cpuid_sys->nent; i++) {
-                       if (cpuid_sys->entries[i].function >= 0x40000000 &&
-                           cpuid_sys->entries[i].function < 0x40000100)
-                               continue;
-                       cpuid_full->entries[nent] = cpuid_sys->entries[i];
-                       nent++;
-               }
-
-               memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
-                      cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
-               cpuid_full->nent = nent + cpuid_hv->nent;
-       }
-
-       vcpu_init_cpuid(vcpu, cpuid_full);
-}
-
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-
-       vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
-       return cpuid;
-}
-
-bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
-{
-       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
-               return false;
-
-       return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
-}
-
-struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
-                                                      vm_vaddr_t *p_hv_pages_gva)
-{
-       vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
-       struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
-
-       /* Setup of a region of guest memory for the VP Assist page. */
-       hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
-       hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
-       hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
-
-       /* Setup of a region of guest memory for the partition assist page. */
-       hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
-       hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
-       hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
-
-       /* Setup of a region of guest memory for the enlightened VMCS. */
-       hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
-       hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
-       hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
-
-       *p_hv_pages_gva = hv_pages_gva;
-       return hv;
-}
-
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
-{
-       uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
-               HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-       wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
-
-       current_vp_assist = vp_assist;
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c

deleted file mode 100644 (file)

index d61e623..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * x86_64-specific extensions to memstress.c.
- *
- * Copyright (C) 2022, Google, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "memstress.h"
-#include "processor.h"
-#include "vmx.h"
-
-void memstress_l2_guest_code(uint64_t vcpu_id)
-{
-       memstress_guest_code(vcpu_id);
-       vmcall();
-}
-
-extern char memstress_l2_guest_entry[];
-__asm__(
-"memstress_l2_guest_entry:"
-"      mov (%rsp), %rdi;"
-"      call memstress_l2_guest_code;"
-"      ud2;"
-);
-
-static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       unsigned long *rsp;
-
-       GUEST_ASSERT(vmx->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-       GUEST_ASSERT(load_vmcs(vmx));
-       GUEST_ASSERT(ept_1g_pages_supported());
-
-       rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
-       *rsp = vcpu_id;
-       prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       GUEST_DONE();
-}
-
-uint64_t memstress_nested_pages(int nr_vcpus)
-{
-       /*
-        * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
-        * pages and 4-level paging, plus a few pages per-vCPU for data
-        * structures such as the VMCS.
-        */
-       return 513 + 10 * nr_vcpus;
-}
-
-void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
-       uint64_t start, end;
-
-       prepare_eptp(vmx, vm, 0);
-
-       /*
-        * Identity map the first 4G and the test region with 1G pages so that
-        * KVM can shadow the EPT12 with the maximum huge page size supported
-        * by the backing source.
-        */
-       nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
-
-       start = align_down(memstress_args.gpa, PG_SIZE_1G);
-       end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
-       nested_identity_map_1g(vmx, vm, start, end - start);
-}
-
-void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
-{
-       struct vmx_pages *vmx, *vmx0 = NULL;
-       struct kvm_regs regs;
-       vm_vaddr_t vmx_gva;
-       int vcpu_id;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-       TEST_REQUIRE(kvm_cpu_has_ept());
-
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-               vmx = vcpu_alloc_vmx(vm, &vmx_gva);
-
-               if (vcpu_id == 0) {
-                       memstress_setup_ept(vmx, vm);
-                       vmx0 = vmx;
-               } else {
-                       /* Share the same EPT table across all vCPUs. */
-                       vmx->eptp = vmx0->eptp;
-                       vmx->eptp_hva = vmx0->eptp_hva;
-                       vmx->eptp_gpa = vmx0->eptp_gpa;
-               }
-
-               /*
-                * Override the vCPU to run memstress_l1_guest_code() which will
-                * bounce it into L2 before calling memstress_guest_code().
-                */
-               vcpu_regs_get(vcpus[vcpu_id], &regs);
-               regs.rip = (unsigned long) memstress_l1_guest_code;
-               vcpu_regs_set(vcpus[vcpu_id], &regs);
-               vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
-       }
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c

deleted file mode 100644 (file)

index f31f042..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-
-#include <stdint.h>
-
-#include <linux/kernel.h>
-
-#include "kvm_util.h"
-#include "pmu.h"
-
-const uint64_t intel_pmu_arch_events[] = {
-       INTEL_ARCH_CPU_CYCLES,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       INTEL_ARCH_REFERENCE_CYCLES,
-       INTEL_ARCH_LLC_REFERENCES,
-       INTEL_ARCH_LLC_MISSES,
-       INTEL_ARCH_BRANCHES_RETIRED,
-       INTEL_ARCH_BRANCHES_MISPREDICTED,
-       INTEL_ARCH_TOPDOWN_SLOTS,
-};
-kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
-
-const uint64_t amd_pmu_zen_events[] = {
-       AMD_ZEN_CORE_CYCLES,
-       AMD_ZEN_INSTRUCTIONS_RETIRED,
-       AMD_ZEN_BRANCHES_RETIRED,
-       AMD_ZEN_BRANCHES_MISPREDICTED,
-};
-kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c

deleted file mode 100644 (file)

index 636b29b..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ /dev/null
@@ -1,1295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include "linux/bitmap.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sev.h"
-
-#ifndef NUM_INTERRUPTS
-#define NUM_INTERRUPTS 256
-#endif
-
-#define KERNEL_CS      0x8
-#define KERNEL_DS      0x10
-#define KERNEL_TSS     0x18
-
-vm_vaddr_t exception_handlers;
-bool host_cpu_is_amd;
-bool host_cpu_is_intel;
-bool is_forced_emulation_enabled;
-uint64_t guest_tsc_khz;
-
-static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
-{
-       fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
-               "rcx: 0x%.16llx rdx: 0x%.16llx\n",
-               indent, "",
-               regs->rax, regs->rbx, regs->rcx, regs->rdx);
-       fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
-               "rsp: 0x%.16llx rbp: 0x%.16llx\n",
-               indent, "",
-               regs->rsi, regs->rdi, regs->rsp, regs->rbp);
-       fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
-               "r10: 0x%.16llx r11: 0x%.16llx\n",
-               indent, "",
-               regs->r8, regs->r9, regs->r10, regs->r11);
-       fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
-               "r14: 0x%.16llx r15: 0x%.16llx\n",
-               indent, "",
-               regs->r12, regs->r13, regs->r14, regs->r15);
-       fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
-               indent, "",
-               regs->rip, regs->rflags);
-}
-
-static void segment_dump(FILE *stream, struct kvm_segment *segment,
-                        uint8_t indent)
-{
-       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
-               "selector: 0x%.4x type: 0x%.2x\n",
-               indent, "", segment->base, segment->limit,
-               segment->selector, segment->type);
-       fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
-               "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
-               indent, "", segment->present, segment->dpl,
-               segment->db, segment->s, segment->l);
-       fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
-               "unusable: 0x%.2x padding: 0x%.2x\n",
-               indent, "", segment->g, segment->avl,
-               segment->unusable, segment->padding);
-}
-
-static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
-                       uint8_t indent)
-{
-       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
-               "padding: 0x%.4x 0x%.4x 0x%.4x\n",
-               indent, "", dtable->base, dtable->limit,
-               dtable->padding[0], dtable->padding[1], dtable->padding[2]);
-}
-
-static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
-{
-       unsigned int i;
-
-       fprintf(stream, "%*scs:\n", indent, "");
-       segment_dump(stream, &sregs->cs, indent + 2);
-       fprintf(stream, "%*sds:\n", indent, "");
-       segment_dump(stream, &sregs->ds, indent + 2);
-       fprintf(stream, "%*ses:\n", indent, "");
-       segment_dump(stream, &sregs->es, indent + 2);
-       fprintf(stream, "%*sfs:\n", indent, "");
-       segment_dump(stream, &sregs->fs, indent + 2);
-       fprintf(stream, "%*sgs:\n", indent, "");
-       segment_dump(stream, &sregs->gs, indent + 2);
-       fprintf(stream, "%*sss:\n", indent, "");
-       segment_dump(stream, &sregs->ss, indent + 2);
-       fprintf(stream, "%*str:\n", indent, "");
-       segment_dump(stream, &sregs->tr, indent + 2);
-       fprintf(stream, "%*sldt:\n", indent, "");
-       segment_dump(stream, &sregs->ldt, indent + 2);
-
-       fprintf(stream, "%*sgdt:\n", indent, "");
-       dtable_dump(stream, &sregs->gdt, indent + 2);
-       fprintf(stream, "%*sidt:\n", indent, "");
-       dtable_dump(stream, &sregs->idt, indent + 2);
-
-       fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
-               "cr3: 0x%.16llx cr4: 0x%.16llx\n",
-               indent, "",
-               sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
-       fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
-               "apic_base: 0x%.16llx\n",
-               indent, "",
-               sregs->cr8, sregs->efer, sregs->apic_base);
-
-       fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
-       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
-               fprintf(stream, "%*s%.16llx\n", indent + 2, "",
-                       sregs->interrupt_bitmap[i]);
-       }
-}
-
-bool kvm_is_tdp_enabled(void)
-{
-       if (host_cpu_is_intel)
-               return get_kvm_intel_param_bool("ept");
-       else
-               return get_kvm_amd_param_bool("npt");
-}
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-       /* If needed, create page map l4 table. */
-       if (!vm->pgd_created) {
-               vm->pgd = vm_alloc_page_table(vm);
-               vm->pgd_created = true;
-       }
-}
-
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
-                         uint64_t vaddr, int level)
-{
-       uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
-       uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
-       int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
-
-       TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
-                   "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
-                   level + 1, vaddr);
-
-       return &page_table[index];
-}
-
-static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
-                                      uint64_t *parent_pte,
-                                      uint64_t vaddr,
-                                      uint64_t paddr,
-                                      int current_level,
-                                      int target_level)
-{
-       uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
-
-       paddr = vm_untag_gpa(vm, paddr);
-
-       if (!(*pte & PTE_PRESENT_MASK)) {
-               *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
-               if (current_level == target_level)
-                       *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-               else
-                       *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
-       } else {
-               /*
-                * Entry already present.  Assert that the caller doesn't want
-                * a hugepage at this level, and that there isn't a hugepage at
-                * this level.
-                */
-               TEST_ASSERT(current_level != target_level,
-                           "Cannot create hugepage at level: %u, vaddr: 0x%lx",
-                           current_level, vaddr);
-               TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
-                           "Cannot create page table at level: %u, vaddr: 0x%lx",
-                           current_level, vaddr);
-       }
-       return pte;
-}
-
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
-{
-       const uint64_t pg_size = PG_LEVEL_SIZE(level);
-       uint64_t *pml4e, *pdpe, *pde;
-       uint64_t *pte;
-
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
-                   "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-       TEST_ASSERT((vaddr % pg_size) == 0,
-                   "Virtual address not aligned,\n"
-                   "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
-                   "Invalid virtual address, vaddr: 0x%lx", vaddr);
-       TEST_ASSERT((paddr % pg_size) == 0,
-                   "Physical address not aligned,\n"
-                   "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
-       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-                   "Physical address beyond maximum supported,\n"
-                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-                   paddr, vm->max_gfn, vm->page_size);
-       TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
-                   "Unexpected bits in paddr: %lx", paddr);
-
-       /*
-        * Allocate upper level page tables, if not already present.  Return
-        * early if a hugepage was created.
-        */
-       pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
-       if (*pml4e & PTE_LARGE_MASK)
-               return;
-
-       pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
-       if (*pdpe & PTE_LARGE_MASK)
-               return;
-
-       pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
-       if (*pde & PTE_LARGE_MASK)
-               return;
-
-       /* Fill in page table entry. */
-       pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
-       TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
-                   "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
-       *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-
-       /*
-        * Neither SEV nor TDX supports shared page tables, so only the final
-        * leaf PTE needs manually set the C/S-bit.
-        */
-       if (vm_is_gpa_protected(vm, paddr))
-               *pte |= vm->arch.c_bit;
-       else
-               *pte |= vm->arch.s_bit;
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
-       __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
-}
-
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-                   uint64_t nr_bytes, int level)
-{
-       uint64_t pg_size = PG_LEVEL_SIZE(level);
-       uint64_t nr_pages = nr_bytes / pg_size;
-       int i;
-
-       TEST_ASSERT(nr_bytes % pg_size == 0,
-                   "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
-                   nr_bytes, pg_size);
-
-       for (i = 0; i < nr_pages; i++) {
-               __virt_pg_map(vm, vaddr, paddr, level);
-
-               vaddr += pg_size;
-               paddr += pg_size;
-       }
-}
-
-static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
-{
-       if (*pte & PTE_LARGE_MASK) {
-               TEST_ASSERT(*level == PG_LEVEL_NONE ||
-                           *level == current_level,
-                           "Unexpected hugepage at level %d", current_level);
-               *level = current_level;
-       }
-
-       return *level == current_level;
-}
-
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
-                                   int *level)
-{
-       uint64_t *pml4e, *pdpe, *pde;
-
-       TEST_ASSERT(!vm->arch.is_pt_protected,
-                   "Walking page tables of protected guests is impossible");
-
-       TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
-                   "Invalid PG_LEVEL_* '%d'", *level);
-
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-               (vaddr >> vm->page_shift)),
-               "Invalid virtual address, vaddr: 0x%lx",
-               vaddr);
-       /*
-        * Based on the mode check above there are 48 bits in the vaddr, so
-        * shift 16 to sign extend the last bit (bit-47),
-        */
-       TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
-               "Canonical check failed.  The virtual address is invalid.");
-
-       pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
-       if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
-               return pml4e;
-
-       pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
-       if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
-               return pdpe;
-
-       pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
-       if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
-               return pde;
-
-       return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
-}
-
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
-{
-       int level = PG_LEVEL_4K;
-
-       return __vm_get_page_table_entry(vm, vaddr, &level);
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
-       uint64_t *pml4e, *pml4e_start;
-       uint64_t *pdpe, *pdpe_start;
-       uint64_t *pde, *pde_start;
-       uint64_t *pte, *pte_start;
-
-       if (!vm->pgd_created)
-               return;
-
-       fprintf(stream, "%*s                                          "
-               "                no\n", indent, "");
-       fprintf(stream, "%*s      index hvaddr         gpaddr         "
-               "addr         w exec dirty\n",
-               indent, "");
-       pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
-       for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
-               pml4e = &pml4e_start[n1];
-               if (!(*pml4e & PTE_PRESENT_MASK))
-                       continue;
-               fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
-                       " %u\n",
-                       indent, "",
-                       pml4e - pml4e_start, pml4e,
-                       addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
-                       !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
-
-               pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
-               for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
-                       pdpe = &pdpe_start[n2];
-                       if (!(*pdpe & PTE_PRESENT_MASK))
-                               continue;
-                       fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
-                               "%u  %u\n",
-                               indent, "",
-                               pdpe - pdpe_start, pdpe,
-                               addr_hva2gpa(vm, pdpe),
-                               PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
-                               !!(*pdpe & PTE_NX_MASK));
-
-                       pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
-                       for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
-                               pde = &pde_start[n3];
-                               if (!(*pde & PTE_PRESENT_MASK))
-                                       continue;
-                               fprintf(stream, "%*spde   0x%-3zx %p "
-                                       "0x%-12lx 0x%-10llx %u  %u\n",
-                                       indent, "", pde - pde_start, pde,
-                                       addr_hva2gpa(vm, pde),
-                                       PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
-                                       !!(*pde & PTE_NX_MASK));
-
-                               pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
-                               for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
-                                       pte = &pte_start[n4];
-                                       if (!(*pte & PTE_PRESENT_MASK))
-                                               continue;
-                                       fprintf(stream, "%*spte   0x%-3zx %p "
-                                               "0x%-12lx 0x%-10llx %u  %u "
-                                               "    %u    0x%-10lx\n",
-                                               indent, "",
-                                               pte - pte_start, pte,
-                                               addr_hva2gpa(vm, pte),
-                                               PTE_GET_PFN(*pte),
-                                               !!(*pte & PTE_WRITABLE_MASK),
-                                               !!(*pte & PTE_NX_MASK),
-                                               !!(*pte & PTE_DIRTY_MASK),
-                                               ((uint64_t) n1 << 27)
-                                                       | ((uint64_t) n2 << 18)
-                                                       | ((uint64_t) n3 << 9)
-                                                       | ((uint64_t) n4));
-                               }
-                       }
-               }
-       }
-}
-
-/*
- * Set Unusable Segment
- *
- * Input Args: None
- *
- * Output Args:
- *   segp - Pointer to segment register
- *
- * Return: None
- *
- * Sets the segment register pointed to by @segp to an unusable state.
- */
-static void kvm_seg_set_unusable(struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->unusable = true;
-}
-
-static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
-{
-       void *gdt = addr_gva2hva(vm, vm->arch.gdt);
-       struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
-
-       desc->limit0 = segp->limit & 0xFFFF;
-       desc->base0 = segp->base & 0xFFFF;
-       desc->base1 = segp->base >> 16;
-       desc->type = segp->type;
-       desc->s = segp->s;
-       desc->dpl = segp->dpl;
-       desc->p = segp->present;
-       desc->limit1 = segp->limit >> 16;
-       desc->avl = segp->avl;
-       desc->l = segp->l;
-       desc->db = segp->db;
-       desc->g = segp->g;
-       desc->base2 = segp->base >> 24;
-       if (!segp->s)
-               desc->base3 = segp->base >> 32;
-}
-
-static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->selector = KERNEL_CS;
-       segp->limit = 0xFFFFFFFFu;
-       segp->s = 0x1; /* kTypeCodeData */
-       segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
-                                         * | kFlagCodeReadable
-                                         */
-       segp->g = true;
-       segp->l = true;
-       segp->present = 1;
-}
-
-static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->selector = KERNEL_DS;
-       segp->limit = 0xFFFFFFFFu;
-       segp->s = 0x1; /* kTypeCodeData */
-       segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
-                                         * | kFlagDataWritable
-                                         */
-       segp->g = true;
-       segp->present = true;
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       int level = PG_LEVEL_NONE;
-       uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
-
-       TEST_ASSERT(*pte & PTE_PRESENT_MASK,
-                   "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
-
-       /*
-        * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
-        * address bits to be zero.
-        */
-       return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
-}
-
-static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->base = base;
-       segp->limit = 0x67;
-       segp->selector = KERNEL_TSS;
-       segp->type = 0xb;
-       segp->present = 1;
-}
-
-static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       struct kvm_sregs sregs;
-
-       TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
-
-       /* Set mode specific system register values. */
-       vcpu_sregs_get(vcpu, &sregs);
-
-       sregs.idt.base = vm->arch.idt;
-       sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
-       sregs.gdt.base = vm->arch.gdt;
-       sregs.gdt.limit = getpagesize() - 1;
-
-       sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
-       sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
-       if (kvm_cpu_has(X86_FEATURE_XSAVE))
-               sregs.cr4 |= X86_CR4_OSXSAVE;
-       sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
-
-       kvm_seg_set_unusable(&sregs.ldt);
-       kvm_seg_set_kernel_code_64bit(&sregs.cs);
-       kvm_seg_set_kernel_data_64bit(&sregs.ds);
-       kvm_seg_set_kernel_data_64bit(&sregs.es);
-       kvm_seg_set_kernel_data_64bit(&sregs.gs);
-       kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
-
-       sregs.cr3 = vm->pgd;
-       vcpu_sregs_set(vcpu, &sregs);
-}
-
-static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       struct kvm_xcrs xcrs = {
-               .nr_xcrs = 1,
-               .xcrs[0].xcr = 0,
-               .xcrs[0].value = kvm_cpu_supported_xcr0(),
-       };
-
-       if (!kvm_cpu_has(X86_FEATURE_XSAVE))
-               return;
-
-       vcpu_xcrs_set(vcpu, &xcrs);
-}
-
-static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
-                         int dpl, unsigned short selector)
-{
-       struct idt_entry *base =
-               (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
-       struct idt_entry *e = &base[vector];
-
-       memset(e, 0, sizeof(*e));
-       e->offset0 = addr;
-       e->selector = selector;
-       e->ist = 0;
-       e->type = 14;
-       e->dpl = dpl;
-       e->p = 1;
-       e->offset1 = addr >> 16;
-       e->offset2 = addr >> 32;
-}
-
-static bool kvm_fixup_exception(struct ex_regs *regs)
-{
-       if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
-               return false;
-
-       if (regs->vector == DE_VECTOR)
-               return false;
-
-       regs->rip = regs->r11;
-       regs->r9 = regs->vector;
-       regs->r10 = regs->error_code;
-       return true;
-}
-
-void route_exception(struct ex_regs *regs)
-{
-       typedef void(*handler)(struct ex_regs *);
-       handler *handlers = (handler *)exception_handlers;
-
-       if (handlers && handlers[regs->vector]) {
-               handlers[regs->vector](regs);
-               return;
-       }
-
-       if (kvm_fixup_exception(regs))
-               return;
-
-       GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
-                  regs->vector, regs->rip);
-}
-
-static void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
-       extern void *idt_handlers;
-       struct kvm_segment seg;
-       int i;
-
-       vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-       vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-       vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-       vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
-       /* Handlers have the same address in both address spaces.*/
-       for (i = 0; i < NUM_INTERRUPTS; i++)
-               set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
-
-       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-
-       kvm_seg_set_kernel_code_64bit(&seg);
-       kvm_seg_fill_gdt_64bit(vm, &seg);
-
-       kvm_seg_set_kernel_data_64bit(&seg);
-       kvm_seg_fill_gdt_64bit(vm, &seg);
-
-       kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
-       kvm_seg_fill_gdt_64bit(vm, &seg);
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-                              void (*handler)(struct ex_regs *))
-{
-       vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
-
-       handlers[vector] = (vm_vaddr_t)handler;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
-               REPORT_GUEST_ASSERT(uc);
-}
-
-void kvm_arch_vm_post_create(struct kvm_vm *vm)
-{
-       int r;
-
-       TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
-                   "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
-
-       vm_create_irqchip(vm);
-       vm_init_descriptor_tables(vm);
-
-       sync_global_to_guest(vm, host_cpu_is_intel);
-       sync_global_to_guest(vm, host_cpu_is_amd);
-       sync_global_to_guest(vm, is_forced_emulation_enabled);
-
-       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
-               struct kvm_sev_init init = { 0 };
-
-               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
-       }
-
-       r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
-       TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
-       guest_tsc_khz = r;
-       sync_global_to_guest(vm, guest_tsc_khz);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
-       struct kvm_regs regs;
-
-       vcpu_regs_get(vcpu, &regs);
-       regs.rip = (unsigned long) guest_code;
-       vcpu_regs_set(vcpu, &regs);
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       struct kvm_mp_state mp_state;
-       struct kvm_regs regs;
-       vm_vaddr_t stack_vaddr;
-       struct kvm_vcpu *vcpu;
-
-       stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
-                                      DEFAULT_GUEST_STACK_VADDR_MIN,
-                                      MEM_REGION_DATA);
-
-       stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
-
-       /*
-        * Align stack to match calling sequence requirements in section "The
-        * Stack Frame" of the System V ABI AMD64 Architecture Processor
-        * Supplement, which requires the value (%rsp + 8) to be a multiple of
-        * 16 when control is transferred to the function entry point.
-        *
-        * If this code is ever used to launch a vCPU with 32-bit entry point it
-        * may need to subtract 4 bytes instead of 8 bytes.
-        */
-       TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
-                   "__vm_vaddr_alloc() did not provide a page-aligned address");
-       stack_vaddr -= 8;
-
-       vcpu = __vm_vcpu_add(vm, vcpu_id);
-       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
-       vcpu_init_sregs(vm, vcpu);
-       vcpu_init_xcrs(vm, vcpu);
-
-       /* Setup guest general purpose registers */
-       vcpu_regs_get(vcpu, &regs);
-       regs.rflags = regs.rflags | 0x2;
-       regs.rsp = stack_vaddr;
-       vcpu_regs_set(vcpu, &regs);
-
-       /* Setup the MP state */
-       mp_state.mp_state = 0;
-       vcpu_mp_state_set(vcpu, &mp_state);
-
-       /*
-        * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
-        * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
-        * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
-        * is consistent with vCPU state.
-        */
-       vcpu_get_cpuid(vcpu);
-       return vcpu;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
-
-       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
-
-       return vcpu;
-}
-
-void vcpu_arch_free(struct kvm_vcpu *vcpu)
-{
-       if (vcpu->cpuid)
-               free(vcpu->cpuid);
-}
-
-/* Do not use kvm_supported_cpuid directly except for validity checks. */
-static void *kvm_supported_cpuid;
-
-const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
-       int kvm_fd;
-
-       if (kvm_supported_cpuid)
-               return kvm_supported_cpuid;
-
-       kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
-                 (struct kvm_cpuid2 *)kvm_supported_cpuid);
-
-       close(kvm_fd);
-       return kvm_supported_cpuid;
-}
-
-static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
-                             uint32_t function, uint32_t index,
-                             uint8_t reg, uint8_t lo, uint8_t hi)
-{
-       const struct kvm_cpuid_entry2 *entry;
-       int i;
-
-       for (i = 0; i < cpuid->nent; i++) {
-               entry = &cpuid->entries[i];
-
-               /*
-                * The output registers in kvm_cpuid_entry2 are in alphabetical
-                * order, but kvm_x86_cpu_feature matches that mess, so yay
-                * pointer shenanigans!
-                */
-               if (entry->function == function && entry->index == index)
-                       return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
-       }
-
-       return 0;
-}
-
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
-                  struct kvm_x86_cpu_feature feature)
-{
-       return __kvm_cpu_has(cpuid, feature.function, feature.index,
-                            feature.reg, feature.bit, feature.bit);
-}
-
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
-                           struct kvm_x86_cpu_property property)
-{
-       return __kvm_cpu_has(cpuid, property.function, property.index,
-                            property.reg, property.lo_bit, property.hi_bit);
-}
-
-uint64_t kvm_get_feature_msr(uint64_t msr_index)
-{
-       struct {
-               struct kvm_msrs header;
-               struct kvm_msr_entry entry;
-       } buffer = {};
-       int r, kvm_fd;
-
-       buffer.header.nmsrs = 1;
-       buffer.entry.index = msr_index;
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
-       TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
-
-       close(kvm_fd);
-       return buffer.entry.data;
-}
-
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
-{
-       int kvm_fd;
-       u64 bitmask;
-       long rc;
-       struct kvm_device_attr attr = {
-               .group = 0,
-               .attr = KVM_X86_XCOMP_GUEST_SUPP,
-               .addr = (unsigned long) &bitmask,
-       };
-
-       TEST_ASSERT(!kvm_supported_cpuid,
-                   "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
-
-       TEST_ASSERT(is_power_of_2(xfeature),
-                   "Dynamic XFeatures must be enabled one at a time");
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-       rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
-       close(kvm_fd);
-
-       if (rc == -1 && (errno == ENXIO || errno == EINVAL))
-               __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
-
-       TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
-
-       __TEST_REQUIRE(bitmask & xfeature,
-                      "Required XSAVE feature '%s' not supported", name);
-
-       TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
-
-       rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
-       TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
-       TEST_ASSERT(bitmask & xfeature,
-                   "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
-                   name, xfeature, bitmask);
-}
-
-void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
-{
-       TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
-
-       /* Allow overriding the default CPUID. */
-       if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
-               free(vcpu->cpuid);
-               vcpu->cpuid = NULL;
-       }
-
-       if (!vcpu->cpuid)
-               vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
-
-       memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
-       vcpu_set_cpuid(vcpu);
-}
-
-void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
-                            struct kvm_x86_cpu_property property,
-                            uint32_t value)
-{
-       struct kvm_cpuid_entry2 *entry;
-
-       entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
-
-       (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
-       (&entry->eax)[property.reg] |= value << property.lo_bit;
-
-       vcpu_set_cpuid(vcpu);
-
-       /* Sanity check that @value doesn't exceed the bounds in any way. */
-       TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
-}
-
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
-{
-       struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
-
-       entry->eax = 0;
-       entry->ebx = 0;
-       entry->ecx = 0;
-       entry->edx = 0;
-       vcpu_set_cpuid(vcpu);
-}
-
-void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
-                                    struct kvm_x86_cpu_feature feature,
-                                    bool set)
-{
-       struct kvm_cpuid_entry2 *entry;
-       u32 *reg;
-
-       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
-       reg = (&entry->eax) + feature.reg;
-
-       if (set)
-               *reg |= BIT(feature.bit);
-       else
-               *reg &= ~BIT(feature.bit);
-
-       vcpu_set_cpuid(vcpu);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
-{
-       struct {
-               struct kvm_msrs header;
-               struct kvm_msr_entry entry;
-       } buffer = {};
-
-       buffer.header.nmsrs = 1;
-       buffer.entry.index = msr_index;
-
-       vcpu_msrs_get(vcpu, &buffer.header);
-
-       return buffer.entry.data;
-}
-
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
-{
-       struct {
-               struct kvm_msrs header;
-               struct kvm_msr_entry entry;
-       } buffer = {};
-
-       memset(&buffer, 0, sizeof(buffer));
-       buffer.header.nmsrs = 1;
-       buffer.entry.index = msr_index;
-       buffer.entry.data = msr_value;
-
-       return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
-       va_list ap;
-       struct kvm_regs regs;
-
-       TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
-                   "  num: %u",
-                   num);
-
-       va_start(ap, num);
-       vcpu_regs_get(vcpu, &regs);
-
-       if (num >= 1)
-               regs.rdi = va_arg(ap, uint64_t);
-
-       if (num >= 2)
-               regs.rsi = va_arg(ap, uint64_t);
-
-       if (num >= 3)
-               regs.rdx = va_arg(ap, uint64_t);
-
-       if (num >= 4)
-               regs.rcx = va_arg(ap, uint64_t);
-
-       if (num >= 5)
-               regs.r8 = va_arg(ap, uint64_t);
-
-       if (num >= 6)
-               regs.r9 = va_arg(ap, uint64_t);
-
-       vcpu_regs_set(vcpu, &regs);
-       va_end(ap);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
-       struct kvm_regs regs;
-       struct kvm_sregs sregs;
-
-       fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
-
-       fprintf(stream, "%*sregs:\n", indent + 2, "");
-       vcpu_regs_get(vcpu, &regs);
-       regs_dump(stream, &regs, indent + 4);
-
-       fprintf(stream, "%*ssregs:\n", indent + 2, "");
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs_dump(stream, &sregs, indent + 4);
-}
-
-static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
-{
-       struct kvm_msr_list *list;
-       struct kvm_msr_list nmsrs;
-       int kvm_fd, r;
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       nmsrs.nmsrs = 0;
-       if (!feature_msrs)
-               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
-       else
-               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
-
-       TEST_ASSERT(r == -1 && errno == E2BIG,
-                   "Expected -E2BIG, got rc: %i errno: %i (%s)",
-                   r, errno, strerror(errno));
-
-       list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
-       TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
-       list->nmsrs = nmsrs.nmsrs;
-
-       if (!feature_msrs)
-               kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
-       else
-               kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
-       close(kvm_fd);
-
-       TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
-                   "Number of MSRs in list changed, was %d, now %d",
-                   nmsrs.nmsrs, list->nmsrs);
-       return list;
-}
-
-const struct kvm_msr_list *kvm_get_msr_index_list(void)
-{
-       static const struct kvm_msr_list *list;
-
-       if (!list)
-               list = __kvm_get_msr_index_list(false);
-       return list;
-}
-
-
-const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
-{
-       static const struct kvm_msr_list *list;
-
-       if (!list)
-               list = __kvm_get_msr_index_list(true);
-       return list;
-}
-
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
-{
-       const struct kvm_msr_list *list = kvm_get_msr_index_list();
-       int i;
-
-       for (i = 0; i < list->nmsrs; ++i) {
-               if (list->indices[i] == msr_index)
-                       return true;
-       }
-
-       return false;
-}
-
-static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
-                                 struct kvm_x86_state *state)
-{
-       int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
-
-       if (size) {
-               state->xsave = malloc(size);
-               vcpu_xsave2_get(vcpu, state->xsave);
-       } else {
-               state->xsave = malloc(sizeof(struct kvm_xsave));
-               vcpu_xsave_get(vcpu, state->xsave);
-       }
-}
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
-{
-       const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
-       struct kvm_x86_state *state;
-       int i;
-
-       static int nested_size = -1;
-
-       if (nested_size == -1) {
-               nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
-               TEST_ASSERT(nested_size <= sizeof(state->nested_),
-                           "Nested state size too big, %i > %zi",
-                           nested_size, sizeof(state->nested_));
-       }
-
-       /*
-        * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
-        * guest state is consistent only after userspace re-enters the
-        * kernel with KVM_RUN.  Complete IO prior to migrating state
-        * to a new VM.
-        */
-       vcpu_run_complete_io(vcpu);
-
-       state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
-       TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
-
-       vcpu_events_get(vcpu, &state->events);
-       vcpu_mp_state_get(vcpu, &state->mp_state);
-       vcpu_regs_get(vcpu, &state->regs);
-       vcpu_save_xsave_state(vcpu, state);
-
-       if (kvm_has_cap(KVM_CAP_XCRS))
-               vcpu_xcrs_get(vcpu, &state->xcrs);
-
-       vcpu_sregs_get(vcpu, &state->sregs);
-
-       if (nested_size) {
-               state->nested.size = sizeof(state->nested_);
-
-               vcpu_nested_state_get(vcpu, &state->nested);
-               TEST_ASSERT(state->nested.size <= nested_size,
-                           "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
-                           state->nested.size, nested_size);
-       } else {
-               state->nested.size = 0;
-       }
-
-       state->msrs.nmsrs = msr_list->nmsrs;
-       for (i = 0; i < msr_list->nmsrs; i++)
-               state->msrs.entries[i].index = msr_list->indices[i];
-       vcpu_msrs_get(vcpu, &state->msrs);
-
-       vcpu_debugregs_get(vcpu, &state->debugregs);
-
-       return state;
-}
-
-void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
-{
-       vcpu_sregs_set(vcpu, &state->sregs);
-       vcpu_msrs_set(vcpu, &state->msrs);
-
-       if (kvm_has_cap(KVM_CAP_XCRS))
-               vcpu_xcrs_set(vcpu, &state->xcrs);
-
-       vcpu_xsave_set(vcpu,  state->xsave);
-       vcpu_events_set(vcpu, &state->events);
-       vcpu_mp_state_set(vcpu, &state->mp_state);
-       vcpu_debugregs_set(vcpu, &state->debugregs);
-       vcpu_regs_set(vcpu, &state->regs);
-
-       if (state->nested.size)
-               vcpu_nested_state_set(vcpu, &state->nested);
-}
-
-void kvm_x86_state_cleanup(struct kvm_x86_state *state)
-{
-       free(state->xsave);
-       free(state);
-}
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
-{
-       if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
-               *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
-               *va_bits = 32;
-       } else {
-               *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
-               *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
-       }
-}
-
-void kvm_init_vm_address_properties(struct kvm_vm *vm)
-{
-       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
-               vm->arch.sev_fd = open_sev_dev_path_or_exit();
-               vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
-               vm->gpa_tag_mask = vm->arch.c_bit;
-       } else {
-               vm->arch.sev_fd = -1;
-       }
-}
-
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
-                                              uint32_t function, uint32_t index)
-{
-       int i;
-
-       for (i = 0; i < cpuid->nent; i++) {
-               if (cpuid->entries[i].function == function &&
-                   cpuid->entries[i].index == index)
-                       return &cpuid->entries[i];
-       }
-
-       TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
-
-       return NULL;
-}
-
-#define X86_HYPERCALL(inputs...)                                       \
-({                                                                     \
-       uint64_t r;                                                     \
-                                                                       \
-       asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t"          \
-                    "jnz 1f\n\t"                                       \
-                    "vmcall\n\t"                                       \
-                    "jmp 2f\n\t"                                       \
-                    "1: vmmcall\n\t"                                   \
-                    "2:"                                               \
-                    : "=a"(r)                                          \
-                    : [use_vmmcall] "r" (host_cpu_is_amd), inputs);    \
-                                                                       \
-       r;                                                              \
-})
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
-                      uint64_t a3)
-{
-       return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
-}
-
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
-{
-       return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
-}
-
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
-{
-       GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
-}
-
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
-{
-       const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
-       unsigned long ht_gfn, max_gfn, max_pfn;
-       uint8_t maxphyaddr, guest_maxphyaddr;
-
-       /*
-        * Use "guest MAXPHYADDR" from KVM if it's available.  Guest MAXPHYADDR
-        * enumerates the max _mappable_ GPA, which can be less than the raw
-        * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
-        * doesn't support 5-level TDP.
-        */
-       guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
-       guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
-       TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
-                   "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
-
-       max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
-
-       /* Avoid reserved HyperTransport region on AMD processors.  */
-       if (!host_cpu_is_amd)
-               return max_gfn;
-
-       /* On parts with <40 physical address bits, the area is fully hidden */
-       if (vm->pa_bits < 40)
-               return max_gfn;
-
-       /* Before family 17h, the HyperTransport area is just below 1T.  */
-       ht_gfn = (1 << 28) - num_ht_pages;
-       if (this_cpu_family() < 0x17)
-               goto done;
-
-       /*
-        * Otherwise it's at the top of the physical address space, possibly
-        * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
-        * the old conservative value if MAXPHYADDR is not enumerated.
-        */
-       if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
-               goto done;
-
-       maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
-       max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
-
-       if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
-               max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
-
-       ht_gfn = max_pfn - num_ht_pages;
-done:
-       return min(max_gfn, ht_gfn - 1);
-}
-
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
-       /* Ensure that a KVM vendor-specific module is loaded. */
-       if (vm == NULL)
-               close(open_kvm_dev_path_or_exit());
-
-       return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
-void kvm_selftest_arch_init(void)
-{
-       host_cpu_is_intel = this_cpu_is_intel();
-       host_cpu_is_amd = this_cpu_is_amd();
-       is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
-}
-
-bool sys_clocksource_is_based_on_tsc(void)
-{
-       char *clk_name = sys_get_cur_clocksource();
-       bool ret = !strcmp(clk_name, "tsc\n") ||
-                  !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
-
-       free(clk_name);
-
-       return ret;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c

deleted file mode 100644 (file)

index e9535ee..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/sev.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "sev.h"
-
-/*
- * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
- * -1 would then cause an underflow back to 2**64 - 1. This is expected and
- * correct.
- *
- * If the last range in the sparsebit is [x, y] and we try to iterate,
- * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
- * and find the first range, but that's correct because the condition
- * expression would cause us to quit the loop.
- */
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
-{
-       const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
-       const vm_paddr_t gpa_base = region->region.guest_phys_addr;
-       const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
-       sparsebit_idx_t i, j;
-
-       if (!sparsebit_any_set(protected_phy_pages))
-               return;
-
-       sev_register_encrypted_memory(vm, region);
-
-       sparsebit_for_each_set_range(protected_phy_pages, i, j) {
-               const uint64_t size = (j - i + 1) * vm->page_size;
-               const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
-
-               sev_launch_update_data(vm, gpa_base + offset, size);
-       }
-}
-
-void sev_vm_init(struct kvm_vm *vm)
-{
-       if (vm->type == KVM_X86_DEFAULT_VM) {
-               assert(vm->arch.sev_fd == -1);
-               vm->arch.sev_fd = open_sev_dev_path_or_exit();
-               vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
-       } else {
-               struct kvm_sev_init init = { 0 };
-               assert(vm->type == KVM_X86_SEV_VM);
-               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
-       }
-}
-
-void sev_es_vm_init(struct kvm_vm *vm)
-{
-       if (vm->type == KVM_X86_DEFAULT_VM) {
-               assert(vm->arch.sev_fd == -1);
-               vm->arch.sev_fd = open_sev_dev_path_or_exit();
-               vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
-       } else {
-               struct kvm_sev_init init = { 0 };
-               assert(vm->type == KVM_X86_SEV_ES_VM);
-               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
-       }
-}
-
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
-{
-       struct kvm_sev_launch_start launch_start = {
-               .policy = policy,
-       };
-       struct userspace_mem_region *region;
-       struct kvm_sev_guest_status status;
-       int ctr;
-
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-
-       TEST_ASSERT_EQ(status.policy, policy);
-       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
-
-       hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
-               encrypt_region(vm, region);
-
-       if (policy & SEV_POLICY_ES)
-               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-
-       vm->arch.is_pt_protected = true;
-}
-
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
-{
-       struct kvm_sev_launch_measure launch_measure;
-       struct kvm_sev_guest_status guest_status;
-
-       launch_measure.len = 256;
-       launch_measure.uaddr = (__u64)measurement;
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
-       TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
-}
-
-void sev_vm_launch_finish(struct kvm_vm *vm)
-{
-       struct kvm_sev_guest_status status;
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-       TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
-                   status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
-                   "Unexpected guest state: %d", status.state);
-
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
-}
-
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
-                                          struct kvm_vcpu **cpu)
-{
-       struct vm_shape shape = {
-               .mode = VM_MODE_DEFAULT,
-               .type = type,
-       };
-       struct kvm_vm *vm;
-       struct kvm_vcpu *cpus[1];
-
-       vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
-       *cpu = cpus[0];
-
-       return vm;
-}
-
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
-{
-       sev_vm_launch(vm, policy);
-
-       if (!measurement)
-               measurement = alloca(256);
-
-       sev_vm_launch_measure(vm, measurement);
-
-       sev_vm_launch_finish(vm);
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c

deleted file mode 100644 (file)

index 5495a92..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ /dev/null
@@ -1,164 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/svm.c
- * Helpers used for nested SVM testing
- * Largely inspired from KVM unit test svm.c
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-#define SEV_DEV_PATH "/dev/sev"
-
-struct gpr64_regs guest_regs;
-u64 rflags;
-
-/* Allocate memory regions for nested SVM tests.
- *
- * Input Args:
- *   vm - The VM to allocate guest-virtual addresses in.
- *
- * Output Args:
- *   p_svm_gva - The guest virtual address for the struct svm_test_data.
- *
- * Return:
- *   Pointer to structure with the addresses of the SVM areas.
- */
-struct svm_test_data *
-vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
-{
-       vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
-       struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
-
-       svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
-       svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
-       svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
-
-       svm->save_area = (void *)vm_vaddr_alloc_page(vm);
-       svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
-       svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
-
-       svm->msr = (void *)vm_vaddr_alloc_page(vm);
-       svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
-       svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
-       memset(svm->msr_hva, 0, getpagesize());
-
-       *p_svm_gva = svm_gva;
-       return svm;
-}
-
-static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
-                        u64 base, u32 limit, u32 attr)
-{
-       seg->selector = selector;
-       seg->attrib = attr;
-       seg->limit = limit;
-       seg->base = base;
-}
-
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
-{
-       struct vmcb *vmcb = svm->vmcb;
-       uint64_t vmcb_gpa = svm->vmcb_gpa;
-       struct vmcb_save_area *save = &vmcb->save;
-       struct vmcb_control_area *ctrl = &vmcb->control;
-       u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
-             | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
-       u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
-               | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
-       uint64_t efer;
-
-       efer = rdmsr(MSR_EFER);
-       wrmsr(MSR_EFER, efer | EFER_SVME);
-       wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
-
-       memset(vmcb, 0, sizeof(*vmcb));
-       asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
-       vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
-       vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
-       vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
-       vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
-       vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
-       vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
-
-       ctrl->asid = 1;
-       save->cpl = 0;
-       save->efer = rdmsr(MSR_EFER);
-       asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
-       asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
-       asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
-       asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
-       asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
-       asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
-       save->g_pat = rdmsr(MSR_IA32_CR_PAT);
-       save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
-       ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
-                               (1ULL << INTERCEPT_VMMCALL);
-       ctrl->msrpm_base_pa = svm->msr_gpa;
-
-       vmcb->save.rip = (u64)guest_rip;
-       vmcb->save.rsp = (u64)guest_rsp;
-       guest_regs.rdi = (u64)svm;
-}
-
-/*
- * save/restore 64-bit general registers except rax, rip, rsp
- * which are directly handed through the VMCB guest processor state
- */
-#define SAVE_GPR_C                             \
-       "xchg %%rbx, guest_regs+0x20\n\t"       \
-       "xchg %%rcx, guest_regs+0x10\n\t"       \
-       "xchg %%rdx, guest_regs+0x18\n\t"       \
-       "xchg %%rbp, guest_regs+0x30\n\t"       \
-       "xchg %%rsi, guest_regs+0x38\n\t"       \
-       "xchg %%rdi, guest_regs+0x40\n\t"       \
-       "xchg %%r8,  guest_regs+0x48\n\t"       \
-       "xchg %%r9,  guest_regs+0x50\n\t"       \
-       "xchg %%r10, guest_regs+0x58\n\t"       \
-       "xchg %%r11, guest_regs+0x60\n\t"       \
-       "xchg %%r12, guest_regs+0x68\n\t"       \
-       "xchg %%r13, guest_regs+0x70\n\t"       \
-       "xchg %%r14, guest_regs+0x78\n\t"       \
-       "xchg %%r15, guest_regs+0x80\n\t"
-
-#define LOAD_GPR_C      SAVE_GPR_C
-
-/*
- * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
- * for now. registers involved in LOAD/SAVE_GPR_C are eventually
- * unmodified so they do not need to be in the clobber list.
- */
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
-{
-       asm volatile (
-               "vmload %[vmcb_gpa]\n\t"
-               "mov rflags, %%r15\n\t" // rflags
-               "mov %%r15, 0x170(%[vmcb])\n\t"
-               "mov guest_regs, %%r15\n\t"     // rax
-               "mov %%r15, 0x1f8(%[vmcb])\n\t"
-               LOAD_GPR_C
-               "vmrun %[vmcb_gpa]\n\t"
-               SAVE_GPR_C
-               "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
-               "mov %%r15, rflags\n\t"
-               "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
-               "mov %%r15, guest_regs\n\t"
-               "vmsave %[vmcb_gpa]\n\t"
-               : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
-               : "r15", "memory");
-}
-
-/*
- * Open SEV_DEV_PATH if available, otherwise exit the entire program.
- *
- * Return:
- *   The opened file descriptor of /dev/sev.
- */
-int open_sev_dev_path_or_exit(void)
-{
-       return open_path_or_exit(SEV_DEV_PATH, 0);
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c

deleted file mode 100644 (file)

index 1265cec..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /*
-        * FIXME: Revert this hack (the entire commit that added it) once nVMX
-        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
-        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
-        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
-        * in particular is problematic) along with RDX and RDI (which are
-        * inputs), and clobber volatile GPRs. *sigh*
-        */
-#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
-       "rcx", "rsi", "r8", "r9", "r10", "r11"
-
-       asm volatile("push %%rbp\n\t"
-                    "push %%r15\n\t"
-                    "push %%r14\n\t"
-                    "push %%r13\n\t"
-                    "push %%r12\n\t"
-                    "push %%rbx\n\t"
-                    "push %%rdx\n\t"
-                    "push %%rdi\n\t"
-                    "in %[port], %%al\n\t"
-                    "pop %%rdi\n\t"
-                    "pop %%rdx\n\t"
-                    "pop %%rbx\n\t"
-                    "pop %%r12\n\t"
-                    "pop %%r13\n\t"
-                    "pop %%r14\n\t"
-                    "pop %%r15\n\t"
-                    "pop %%rbp\n\t"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
-                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
-}
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
-               struct kvm_regs regs;
-
-               vcpu_regs_get(vcpu, &regs);
-               return (void *)regs.rdi;
-       }
-       return NULL;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c

deleted file mode 100644 (file)

index d7ac122..0000000
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ /dev/null
@@ -1,554 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/vmx.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include <asm/msr-index.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define PAGE_SHIFT_4K  12
-
-#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
-
-bool enable_evmcs;
-
-struct hv_enlightened_vmcs *current_evmcs;
-struct hv_vp_assist_page *current_vp_assist;
-
-struct eptPageTableEntry {
-       uint64_t readable:1;
-       uint64_t writable:1;
-       uint64_t executable:1;
-       uint64_t memory_type:3;
-       uint64_t ignore_pat:1;
-       uint64_t page_size:1;
-       uint64_t accessed:1;
-       uint64_t dirty:1;
-       uint64_t ignored_11_10:2;
-       uint64_t address:40;
-       uint64_t ignored_62_52:11;
-       uint64_t suppress_ve:1;
-};
-
-struct eptPageTablePointer {
-       uint64_t memory_type:3;
-       uint64_t page_walk_length:3;
-       uint64_t ad_enabled:1;
-       uint64_t reserved_11_07:5;
-       uint64_t address:40;
-       uint64_t reserved_63_52:12;
-};
-int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
-{
-       uint16_t evmcs_ver;
-
-       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
-                       (unsigned long)&evmcs_ver);
-
-       /* KVM should return supported EVMCS version range */
-       TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
-                   (evmcs_ver & 0xff) > 0,
-                   "Incorrect EVMCS version range: %x:%x",
-                   evmcs_ver & 0xff, evmcs_ver >> 8);
-
-       return evmcs_ver;
-}
-
-/* Allocate memory regions for nested VMX tests.
- *
- * Input Args:
- *   vm - The VM to allocate guest-virtual addresses in.
- *
- * Output Args:
- *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
- *
- * Return:
- *   Pointer to structure with the addresses of the VMX areas.
- */
-struct vmx_pages *
-vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
-{
-       vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
-       struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
-
-       /* Setup of a region of guest memory for the vmxon region. */
-       vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
-       vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
-
-       /* Setup of a region of guest memory for a vmcs. */
-       vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
-       vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
-
-       /* Setup of a region of guest memory for the MSR bitmap. */
-       vmx->msr = (void *)vm_vaddr_alloc_page(vm);
-       vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
-       vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
-       memset(vmx->msr_hva, 0, getpagesize());
-
-       /* Setup of a region of guest memory for the shadow VMCS. */
-       vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
-       vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
-       vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
-
-       /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
-       vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
-       vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
-       memset(vmx->vmread_hva, 0, getpagesize());
-
-       vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
-       vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
-       memset(vmx->vmwrite_hva, 0, getpagesize());
-
-       *p_vmx_gva = vmx_gva;
-       return vmx;
-}
-
-bool prepare_for_vmx_operation(struct vmx_pages *vmx)
-{
-       uint64_t feature_control;
-       uint64_t required;
-       unsigned long cr0;
-       unsigned long cr4;
-
-       /*
-        * Ensure bits in CR0 and CR4 are valid in VMX operation:
-        * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
-        * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
-        */
-       __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
-       cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
-       cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
-       __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
-
-       __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
-       cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
-       cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
-       /* Enable VMX operation */
-       cr4 |= X86_CR4_VMXE;
-       __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
-
-       /*
-        * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
-        *  Bit 0: Lock bit. If clear, VMXON causes a #GP.
-        *  Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
-        *    outside of SMX causes a #GP.
-        */
-       required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
-       required |= FEAT_CTL_LOCKED;
-       feature_control = rdmsr(MSR_IA32_FEAT_CTL);
-       if ((feature_control & required) != required)
-               wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
-
-       /* Enter VMX root operation. */
-       *(uint32_t *)(vmx->vmxon) = vmcs_revision();
-       if (vmxon(vmx->vmxon_gpa))
-               return false;
-
-       return true;
-}
-
-bool load_vmcs(struct vmx_pages *vmx)
-{
-       /* Load a VMCS. */
-       *(uint32_t *)(vmx->vmcs) = vmcs_revision();
-       if (vmclear(vmx->vmcs_gpa))
-               return false;
-
-       if (vmptrld(vmx->vmcs_gpa))
-               return false;
-
-       /* Setup shadow VMCS, do not load it yet. */
-       *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
-       if (vmclear(vmx->shadow_vmcs_gpa))
-               return false;
-
-       return true;
-}
-
-static bool ept_vpid_cap_supported(uint64_t mask)
-{
-       return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
-}
-
-bool ept_1g_pages_supported(void)
-{
-       return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
-}
-
-/*
- * Initialize the control fields to the most basic settings possible.
- */
-static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
-{
-       uint32_t sec_exec_ctl = 0;
-
-       vmwrite(VIRTUAL_PROCESSOR_ID, 0);
-       vmwrite(POSTED_INTR_NV, 0);
-
-       vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
-
-       if (vmx->eptp_gpa) {
-               uint64_t ept_paddr;
-               struct eptPageTablePointer eptp = {
-                       .memory_type = X86_MEMTYPE_WB,
-                       .page_walk_length = 3, /* + 1 */
-                       .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
-                       .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
-               };
-
-               memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
-               vmwrite(EPT_POINTER, ept_paddr);
-               sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
-       }
-
-       if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
-               vmwrite(CPU_BASED_VM_EXEC_CONTROL,
-                       rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
-       else {
-               vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
-               GUEST_ASSERT(!sec_exec_ctl);
-       }
-
-       vmwrite(EXCEPTION_BITMAP, 0);
-       vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
-       vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
-       vmwrite(CR3_TARGET_COUNT, 0);
-       vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
-               VM_EXIT_HOST_ADDR_SPACE_SIZE);    /* 64-bit host */
-       vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
-       vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
-       vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
-               VM_ENTRY_IA32E_MODE);             /* 64-bit guest */
-       vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
-       vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
-       vmwrite(TPR_THRESHOLD, 0);
-
-       vmwrite(CR0_GUEST_HOST_MASK, 0);
-       vmwrite(CR4_GUEST_HOST_MASK, 0);
-       vmwrite(CR0_READ_SHADOW, get_cr0());
-       vmwrite(CR4_READ_SHADOW, get_cr4());
-
-       vmwrite(MSR_BITMAP, vmx->msr_gpa);
-       vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
-       vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
-}
-
-/*
- * Initialize the host state fields based on the current host state, with
- * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
- * or vmresume.
- */
-static inline void init_vmcs_host_state(void)
-{
-       uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
-
-       vmwrite(HOST_ES_SELECTOR, get_es());
-       vmwrite(HOST_CS_SELECTOR, get_cs());
-       vmwrite(HOST_SS_SELECTOR, get_ss());
-       vmwrite(HOST_DS_SELECTOR, get_ds());
-       vmwrite(HOST_FS_SELECTOR, get_fs());
-       vmwrite(HOST_GS_SELECTOR, get_gs());
-       vmwrite(HOST_TR_SELECTOR, get_tr());
-
-       if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
-               vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
-       if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
-               vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
-       if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-               vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
-                       rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
-
-       vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
-
-       vmwrite(HOST_CR0, get_cr0());
-       vmwrite(HOST_CR3, get_cr3());
-       vmwrite(HOST_CR4, get_cr4());
-       vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
-       vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
-       vmwrite(HOST_TR_BASE,
-               get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
-       vmwrite(HOST_GDTR_BASE, get_gdt().address);
-       vmwrite(HOST_IDTR_BASE, get_idt().address);
-       vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
-       vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
-}
-
-/*
- * Initialize the guest state fields essentially as a clone of
- * the host state fields. Some host state fields have fixed
- * values, and we set the corresponding guest state fields accordingly.
- */
-static inline void init_vmcs_guest_state(void *rip, void *rsp)
-{
-       vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
-       vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
-       vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
-       vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
-       vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
-       vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
-       vmwrite(GUEST_LDTR_SELECTOR, 0);
-       vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
-       vmwrite(GUEST_INTR_STATUS, 0);
-       vmwrite(GUEST_PML_INDEX, 0);
-
-       vmwrite(VMCS_LINK_POINTER, -1ll);
-       vmwrite(GUEST_IA32_DEBUGCTL, 0);
-       vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
-       vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
-       vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
-               vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
-
-       vmwrite(GUEST_ES_LIMIT, -1);
-       vmwrite(GUEST_CS_LIMIT, -1);
-       vmwrite(GUEST_SS_LIMIT, -1);
-       vmwrite(GUEST_DS_LIMIT, -1);
-       vmwrite(GUEST_FS_LIMIT, -1);
-       vmwrite(GUEST_GS_LIMIT, -1);
-       vmwrite(GUEST_LDTR_LIMIT, -1);
-       vmwrite(GUEST_TR_LIMIT, 0x67);
-       vmwrite(GUEST_GDTR_LIMIT, 0xffff);
-       vmwrite(GUEST_IDTR_LIMIT, 0xffff);
-       vmwrite(GUEST_ES_AR_BYTES,
-               vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
-       vmwrite(GUEST_SS_AR_BYTES, 0xc093);
-       vmwrite(GUEST_DS_AR_BYTES,
-               vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_FS_AR_BYTES,
-               vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_GS_AR_BYTES,
-               vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
-       vmwrite(GUEST_TR_AR_BYTES, 0x8b);
-       vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
-       vmwrite(GUEST_ACTIVITY_STATE, 0);
-       vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
-       vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
-
-       vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
-       vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
-       vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
-       vmwrite(GUEST_ES_BASE, 0);
-       vmwrite(GUEST_CS_BASE, 0);
-       vmwrite(GUEST_SS_BASE, 0);
-       vmwrite(GUEST_DS_BASE, 0);
-       vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
-       vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
-       vmwrite(GUEST_LDTR_BASE, 0);
-       vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
-       vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
-       vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
-       vmwrite(GUEST_DR7, 0x400);
-       vmwrite(GUEST_RSP, (uint64_t)rsp);
-       vmwrite(GUEST_RIP, (uint64_t)rip);
-       vmwrite(GUEST_RFLAGS, 2);
-       vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
-       vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
-       vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
-}
-
-void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
-{
-       init_vmcs_control_fields(vmx);
-       init_vmcs_host_state();
-       init_vmcs_guest_state(guest_rip, guest_rsp);
-}
-
-static void nested_create_pte(struct kvm_vm *vm,
-                             struct eptPageTableEntry *pte,
-                             uint64_t nested_paddr,
-                             uint64_t paddr,
-                             int current_level,
-                             int target_level)
-{
-       if (!pte->readable) {
-               pte->writable = true;
-               pte->readable = true;
-               pte->executable = true;
-               pte->page_size = (current_level == target_level);
-               if (pte->page_size)
-                       pte->address = paddr >> vm->page_shift;
-               else
-                       pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
-       } else {
-               /*
-                * Entry already present.  Assert that the caller doesn't want
-                * a hugepage at this level, and that there isn't a hugepage at
-                * this level.
-                */
-               TEST_ASSERT(current_level != target_level,
-                           "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
-                           current_level, nested_paddr);
-               TEST_ASSERT(!pte->page_size,
-                           "Cannot create page table at level: %u, nested_paddr: 0x%lx",
-                           current_level, nested_paddr);
-       }
-}
-
-
-void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                    uint64_t nested_paddr, uint64_t paddr, int target_level)
-{
-       const uint64_t page_size = PG_LEVEL_SIZE(target_level);
-       struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
-       uint16_t index;
-
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-                   "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-       TEST_ASSERT((nested_paddr >> 48) == 0,
-                   "Nested physical address 0x%lx requires 5-level paging",
-                   nested_paddr);
-       TEST_ASSERT((nested_paddr % page_size) == 0,
-                   "Nested physical address not on page boundary,\n"
-                   "  nested_paddr: 0x%lx page_size: 0x%lx",
-                   nested_paddr, page_size);
-       TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
-                   "Physical address beyond beyond maximum supported,\n"
-                   "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-                   paddr, vm->max_gfn, vm->page_size);
-       TEST_ASSERT((paddr % page_size) == 0,
-                   "Physical address not on page boundary,\n"
-                   "  paddr: 0x%lx page_size: 0x%lx",
-                   paddr, page_size);
-       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-                   "Physical address beyond beyond maximum supported,\n"
-                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-                   paddr, vm->max_gfn, vm->page_size);
-
-       for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
-               index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
-               pte = &pt[index];
-
-               nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
-               if (pte->page_size)
-                       break;
-
-               pt = addr_gpa2hva(vm, pte->address * vm->page_size);
-       }
-
-       /*
-        * For now mark these as accessed and dirty because the only
-        * testcase we have needs that.  Can be reconsidered later.
-        */
-       pte->accessed = true;
-       pte->dirty = true;
-
-}
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                  uint64_t nested_paddr, uint64_t paddr)
-{
-       __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
-}
-
-/*
- * Map a range of EPT guest physical addresses to the VM's physical address
- *
- * Input Args:
- *   vm - Virtual Machine
- *   nested_paddr - Nested guest physical address to map
- *   paddr - VM Physical Address
- *   size - The size of the range to map
- *   level - The level at which to map the range
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a nested guest translation for the
- * page range starting at nested_paddr to the page range starting at paddr.
- */
-void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                 uint64_t nested_paddr, uint64_t paddr, uint64_t size,
-                 int level)
-{
-       size_t page_size = PG_LEVEL_SIZE(level);
-       size_t npages = size / page_size;
-
-       TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
-       TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
-
-       while (npages--) {
-               __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
-               nested_paddr += page_size;
-               paddr += page_size;
-       }
-}
-
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-               uint64_t nested_paddr, uint64_t paddr, uint64_t size)
-{
-       __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
-}
-
-/* Prepare an identity extended page table that maps all the
- * physical pages in VM.
- */
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
-                       uint32_t memslot)
-{
-       sparsebit_idx_t i, last;
-       struct userspace_mem_region *region =
-               memslot2region(vm, memslot);
-
-       i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
-       last = i + (region->region.memory_size >> vm->page_shift);
-       for (;;) {
-               i = sparsebit_next_clear(region->unused_phy_pages, i);
-               if (i > last)
-                       break;
-
-               nested_map(vmx, vm,
-                          (uint64_t)i << vm->page_shift,
-                          (uint64_t)i << vm->page_shift,
-                          1 << vm->page_shift);
-       }
-}
-
-/* Identity map a region with 1GiB Pages. */
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
-                           uint64_t addr, uint64_t size)
-{
-       __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
-}
-
-bool kvm_cpu_has_ept(void)
-{
-       uint64_t ctrl;
-
-       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
-       if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
-               return false;
-
-       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
-       return ctrl & SECONDARY_EXEC_ENABLE_EPT;
-}
-
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
-                 uint32_t eptp_memslot)
-{
-       TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
-
-       vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
-       vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
-       vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
-}
-
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
-       vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
-       vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
-       vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
-}
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c

new file mode 100644 (file)

index 0000000..e32dd59
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ *  Nico Boehr <nrb@linux.ibm.com>
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN PAGE_SIZE
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+       asm volatile(
+               /* load TEST_DATA_START_GFN into r1 */
+               "       llilf 1,%[start_gfn]\n"
+               /* calculate the address from the gfn */
+               "       sllg 1,1,12(0)\n"
+               /* set the first page in TEST_DATA memslot to STABLE */
+               "       .insn rrf,0xb9ab0000,2,1,1,0\n"
+               /* hypercall */
+               "       diag 0,0,0x501\n"
+               "0:     j 0b"
+               :
+               : [start_gfn] "L"(TEST_DATA_START_GFN)
+               : "r1", "r2", "memory", "cc"
+       );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+       asm volatile(
+               /* r1 = TEST_DATA_START_GFN */
+               "       xgr 1,1\n"
+               "       llilf 1,%[start_gfn]\n"
+               /* r5 = TEST_DATA_PAGE_COUNT */
+               "       lghi 5,%[page_count]\n"
+               /* r5 += r1 */
+               "2:     agfr 5,1\n"
+               /* r2 = r1 << PAGE_SHIFT */
+               "1:     sllg 2,1,12(0)\n"
+               /* essa(r4, r2, SET_STABLE) */
+               "       .insn rrf,0xb9ab0000,4,2,1,0\n"
+               /* i++ */
+               "       agfi 1,1\n"
+               /* if r1 < r5 goto 1 */
+               "       cgrjl 1,5,1b\n"
+               /* hypercall */
+               "       diag 0,0,0x501\n"
+               "0:     j 0b"
+               :
+               : [start_gfn] "L"(TEST_DATA_START_GFN),
+                 [page_count] "L"(TEST_DATA_PAGE_COUNT)
+               :
+                       /* the counter in our loop over the pages */
+                       "r1",
+                       /* the calculated page physical address */
+                       "r2",
+                       /* ESSA output register */
+                       "r4",
+                       /* last page */
+                       "r5",
+                       "cc", "memory"
+       );
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+       int i;
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+       /* set the array of memslots to zero like __vm_create does */
+       for (i = 0; i < NR_MEM_REGIONS; i++)
+               vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+       vm_userspace_mem_region_add(vm,
+                                   VM_MEM_SRC_ANONYMOUS,
+                                   TEST_DATA_START_GFN << vm->page_shift,
+                                   TEST_DATA_MEMSLOT,
+                                   TEST_DATA_PAGE_COUNT,
+                                   0
+                                  );
+       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+       /*
+        * Our VM has the following memory layout:
+        * +------+---------------------------+
+        * | GFN  | Memslot                   |
+        * +------+---------------------------+
+        * | 0    |                           |
+        * | ...  | MAIN (Code, Stack, ...)   |
+        * | 511  |                           |
+        * +------+---------------------------+
+        * | 4096 |                           |
+        * | ...  | TEST_DATA                 |
+        * | 4607 |                           |
+        * +------+---------------------------+
+        */
+       create_main_memslot(vm);
+       create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+       struct userspace_mem_region *slot0;
+
+       kvm_vm_elf_load(vm, program_invocation_name);
+
+       slot0 = memslot2region(vm, 0);
+       ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+       kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_barebones();
+
+       create_memslots(vm);
+
+       finish_vm_setup(vm);
+
+       return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+       int r;
+
+       r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+       TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+       vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+       vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+       return __kvm_device_attr_set(vm->fd,
+                                    KVM_S390_VM_MIGRATION,
+                                    KVM_S390_VM_MIGRATION_START,
+                                    NULL
+                                   );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+       int r = __enable_migration_mode(vm);
+
+       TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+       u64 out;
+       int r;
+
+       r = __kvm_device_attr_get(vm->fd,
+                                 KVM_S390_VM_MIGRATION,
+                                 KVM_S390_VM_MIGRATION_STATUS,
+                                 &out
+                                );
+       TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+       return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+       struct kvm_s390_cmma_log args;
+       int rc;
+
+       errno = 0;
+
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = 0,
+               .count = sizeof(cmma_value_buf),
+               .flags = flags,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+       *errno_out = errno;
+       return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+       struct kvm_vm *vm = create_vm_two_memslots();
+       struct kvm_vcpu *vcpu;
+       int rc, errno_out;
+
+       /* GET_CMMA_BITS without CMMA enabled should fail */
+       rc = vm_get_cmma_bits(vm, 0, &errno_out);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, ENXIO);
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+       vcpu_run(vcpu);
+
+       /* GET_CMMA_BITS without migration mode and without peeking should fail */
+       rc = vm_get_cmma_bits(vm, 0, &errno_out);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
+
+       /* GET_CMMA_BITS without migration mode and with peeking should work */
+       rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(errno_out, 0);
+
+       enable_dirty_tracking(vm);
+       enable_migration_mode(vm);
+
+       /* GET_CMMA_BITS with invalid flags */
+       rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
+
+       kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+       struct kvm_vm *vm = vm_create_barebones();
+       struct kvm_vcpu *vcpu;
+       u64 orig_psw;
+       int rc;
+
+       /* enabling migration mode on a VM without memory should fail */
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+       errno = 0;
+
+       create_memslots(vm);
+       finish_vm_setup(vm);
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+       orig_psw = vcpu->run->psw_addr;
+
+       /*
+        * Execute one essa instruction in the guest. Otherwise the guest will
+        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+        */
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       /* migration mode when memslots have dirty tracking off should fail */
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+       errno = 0;
+
+       /* enable dirty tracking */
+       enable_dirty_tracking(vm);
+
+       /* enabling migration mode should work now */
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       errno = 0;
+
+       /* execute another ESSA instruction to see this goes fine */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       /*
+        * With migration mode on, create a new memslot with dirty tracking off.
+        * This should turn off migration mode.
+        */
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       vm_userspace_mem_region_add(vm,
+                                   VM_MEM_SRC_ANONYMOUS,
+                                   TEST_DATA_TWO_START_GFN << vm->page_shift,
+                                   TEST_DATA_TWO_MEMSLOT,
+                                   TEST_DATA_TWO_PAGE_COUNT,
+                                   0
+                                  );
+       TEST_ASSERT(!is_migration_mode_on(vm),
+                   "creating memslot without dirty tracking turns off migration mode"
+                  );
+
+       /* ESSA instructions should still execute fine */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       /*
+        * Turn on dirty tracking on the new memslot.
+        * It should be possible to turn migration mode back on again.
+        */
+       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       errno = 0;
+
+       /*
+        * Turn off dirty tracking again, this time with just a flag change.
+        * Again, migration mode should turn off.
+        */
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+       TEST_ASSERT(!is_migration_mode_on(vm),
+                   "disabling dirty tracking should turn off migration mode"
+                  );
+
+       /* ESSA instructions should still execute fine */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+       struct kvm_s390_cmma_log args;
+
+       /*
+        * First iteration - everything should be dirty.
+        * Start at the main memslot...
+        */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = 0,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+
+       /* ...and then - after a hole - the TEST_DATA memslot should follow */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = MAIN_PAGE_COUNT,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+       TEST_ASSERT_EQ(args.remaining, 0);
+
+       /* ...and nothing else should be there */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       TEST_ASSERT_EQ(args.count, 0);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+       struct kvm_s390_cmma_log args;
+
+       /* If we start from GFN 0 again, nothing should be dirty. */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = 0,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       if (args.count || args.remaining || args.start_gfn)
+               TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+                         args.start_gfn,
+                         args.count,
+                         args.remaining
+                        );
+}
+
+static void test_get_inital_dirty(void)
+{
+       struct kvm_vm *vm = create_vm_two_memslots();
+       struct kvm_vcpu *vcpu;
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+       /*
+        * Execute one essa instruction in the guest. Otherwise the guest will
+        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+        */
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       enable_dirty_tracking(vm);
+       enable_migration_mode(vm);
+
+       assert_all_slots_cmma_dirty(vm);
+
+       /* Start from the beginning again and make sure nothing else is dirty */
+       assert_no_pages_cmma_dirty(vm);
+
+       kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+                            u64 start_gfn, u64 gfn_count,
+                            struct kvm_s390_cmma_log *res_out)
+{
+       *res_out = (struct kvm_s390_cmma_log){
+               .start_gfn = start_gfn,
+               .count = gfn_count,
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+                             u64 dirty_gfn_count,
+                             const struct kvm_s390_cmma_log *res)
+{
+       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
+       for (size_t i = 0; i < dirty_gfn_count; i++)
+               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+       size_t gfn_offset;
+       struct kvm_vm *vm = create_vm_two_memslots();
+       struct kvm_s390_cmma_log log;
+       struct kvm_vcpu *vcpu;
+       u64 orig_psw;
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+       orig_psw = vcpu->run->psw_addr;
+
+       /*
+        * Execute some essa instructions in the guest. Otherwise the guest will
+        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+        */
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       enable_dirty_tracking(vm);
+       enable_migration_mode(vm);
+
+       /* un-dirty all pages */
+       assert_all_slots_cmma_dirty(vm);
+
+       /* Then, dirty just the TEST_DATA memslot */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+
+       gfn_offset = TEST_DATA_START_GFN;
+       /**
+        * Query CMMA attributes of one page, starting at page 0. Since the
+        * main memslot was not touched by the VM, this should yield the first
+        * page of the TEST_DATA memslot.
+        * The dirty bitmap should now look like this:
+        * 0: not dirty
+        * [0x1, 0x200): dirty
+        */
+       query_cmma_range(vm, 0, 1, &log);
+       assert_cmma_dirty(gfn_offset, 1, &log);
+       gfn_offset++;
+
+       /**
+        * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+        * memslot. This should wrap back to the beginning of the TEST_DATA
+        * memslot, page 1.
+        * The dirty bitmap should now look like this:
+        * [0, 0x21): not dirty
+        * [0x21, 0x200): dirty
+        */
+       query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+       assert_cmma_dirty(gfn_offset, 0x20, &log);
+       gfn_offset += 0x20;
+
+       /* Skip 32 pages */
+       gfn_offset += 0x20;
+
+       /**
+        * After skipping 32 pages, query the next 32 (0x20) pages.
+        * The dirty bitmap should now look like this:
+        * [0, 0x21): not dirty
+        * [0x21, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       query_cmma_range(vm, gfn_offset, 0x20, &log);
+       assert_cmma_dirty(gfn_offset, 0x20, &log);
+       gfn_offset += 0x20;
+
+       /**
+        * Query 1 page from the beginning of the TEST_DATA memslot. This should
+        * yield page 0x21.
+        * The dirty bitmap should now look like this:
+        * [0, 0x22): not dirty
+        * [0x22, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+       assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+       gfn_offset++;
+
+       /**
+        * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+        * This should yield pages [0x23, 0x33).
+        * The dirty bitmap should now look like this:
+        * [0, 0x22): not dirty
+        * 0x22: dirty
+        * [0x23, 0x33): not dirty
+        * [0x33, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x23;
+       query_cmma_range(vm, gfn_offset, 15, &log);
+       assert_cmma_dirty(gfn_offset, 15, &log);
+
+       /**
+        * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+        * This should yield page [0x22, 0x33)
+        * The dirty bitmap should now look like this:
+        * [0, 0x33): not dirty
+        * [0x33, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x22;
+       query_cmma_range(vm, gfn_offset, 17, &log);
+       assert_cmma_dirty(gfn_offset, 17, &log);
+
+       /**
+        * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+        * This should yield page 0x40 and nothing more, since there are more
+        * than 16 non-dirty pages after page 0x40.
+        * The dirty bitmap should now look like this:
+        * [0, 0x33): not dirty
+        * [0x33, 0x40): dirty
+        * [0x40, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x40;
+       query_cmma_range(vm, gfn_offset, 25, &log);
+       assert_cmma_dirty(gfn_offset, 1, &log);
+
+       /**
+        * Query pages [0x33, 0x40).
+        * The dirty bitmap should now look like this:
+        * [0, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x33;
+       query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+       assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+       /**
+        * Query the remaining pages [0x61, 0x200).
+        */
+       gfn_offset = TEST_DATA_START_GFN;
+       query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+       assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+       assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "migration mode and dirty tracking", test_migration_mode },
+       { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+       { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+       { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+       struct kvm_vm *vm = vm_create_barebones();
+       int r;
+
+       r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+       kvm_vm_free(vm);
+
+       return r;
+}
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+       TEST_REQUIRE(machine_has_cmma());
+
+       ksft_print_header();
+
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/config b/tools/testing/selftests/kvm/s390/config

new file mode 100644 (file)

index 0000000..23270f2
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/config
@@ -0,0 +1,2 @@
+CONFIG_KVM=y
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c

new file mode 100644 (file)

index 0000000..2725588
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
+ * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
+ * query data reported via the CPU Model, allowing us to directly compare it with the data
+ * acquired through executing the queries in the test.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include "facility.h"
+
+#include "kvm_util.h"
+
+#define PLO_FUNCTION_MAX 256
+
+/* Query available CPU subfunctions */
+struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
+
+static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
+                                       struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
+{
+       int r;
+
+       r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
+                                 KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
+
+       TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+       unsigned long function = nr | 0x100;
+       int cc;
+
+       asm volatile("  lgr     0,%[function]\n"
+                       /* Parameter registers are ignored for "test bit" */
+                       "       plo     0,0,0,0(0)\n"
+                       "       ipm     %0\n"
+                       "       srl     %0,28\n"
+                       : "=d" (cc)
+                       : [function] "d" (function)
+                       : "cc", "0");
+       return cc == 0;
+}
+
+/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
+static void test_plo_asm_block(u8 (*query)[32])
+{
+       for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
+               if (plo_test_bit(i))
+                       (*query)[i >> 3] |= 0x80 >> (i & 7);
+       }
+}
+
+/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
+static void test_kmac_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb91e0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
+static void test_kmc_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92f0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
+static void test_km_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92e0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
+static void test_kimd_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93e0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
+static void test_klmd_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93f0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
+static void test_kmctr_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rrf,0xb92d0000,2,4,6,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
+static void test_kmf_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92a0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
+static void test_kmo_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92b0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
+static void test_pcc_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92c0000,0,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
+static void test_prno_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93c0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
+static void test_kma_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rrf,0xb9290000,2,4,6,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
+static void test_kdsa_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93a0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
+static void test_sortl_asm_block(u8 (*query)[32])
+{
+       asm volatile("  lghi    0,0\n"
+                       "       la      1,%[query]\n"
+                       "       .insn   rre,0xb9380000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "0", "1");
+}
+
+/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
+static void test_dfltcc_asm_block(u8 (*query)[32])
+{
+       asm volatile("  lghi    0,0\n"
+                       "       la      1,%[query]\n"
+                       "       .insn   rrf,0xb9390000,2,4,6,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "0", "1");
+}
+
+/*
+ * Testing Perform Function with Concurrent Results (PFCR)
+ * CPU subfunctions's ASM block
+ */
+static void test_pfcr_asm_block(u8 (*query)[16])
+{
+       asm volatile("  lghi    0,0\n"
+                       "       .insn   rsy,0xeb0000000016,0,0,%[query]\n"
+                       : [query] "=QS" (*query)
+                       :
+                       : "cc", "0");
+}
+
+typedef void (*testfunc_t)(u8 (*array)[]);
+
+struct testdef {
+       const char *subfunc_name;
+       u8 *subfunc_array;
+       size_t array_size;
+       testfunc_t test;
+       int facility_bit;
+} testlist[] = {
+       /*
+        * PLO was introduced in the very first 64-bit machine generation.
+        * Hence it is assumed PLO is always installed in Z Arch.
+        */
+       { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
+       /* MSA - Facility bit 17 */
+       { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
+       { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
+       { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
+       { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
+       { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
+       /* MSA - Facility bit 77 */
+       { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
+       { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
+       { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
+       { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
+       /* MSA5 - Facility bit 57 */
+       { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
+       /* MSA8 - Facility bit 146 */
+       { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
+       /* MSA9 - Facility bit 155 */
+       { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
+       /* SORTL - Facility bit 150 */
+       { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
+       /* DFLTCC - Facility bit 151 */
+       { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
+       /* Concurrent-function facility - Facility bit 201 */
+       { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
+};
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       int idx;
+
+       ksft_print_header();
+
+       vm = vm_create(1);
+
+       memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
+       get_cpu_machine_subfuntions(vm, &cpu_subfunc);
+
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               if (test_facility(testlist[idx].facility_bit)) {
+                       u8 *array = malloc(testlist[idx].array_size);
+
+                       testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
+
+                       TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
+                                             array, testlist[idx].array_size), 0);
+
+                       ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
+                       free(array);
+               } else {
+                       ksft_test_result_skip("%s feature is not avaialable\n",
+                                             testlist[idx].subfunc_name);
+               }
+       }
+
+       kvm_vm_free(vm);
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c

new file mode 100644 (file)

index 0000000..ad80959
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/debug_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+    "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+                                     size_t new_psw_off, uint64_t *new_psw)
+{
+       struct kvm_guest_debug debug = {};
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       char *lowcore;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       lowcore = addr_gpa2hva(vm, 0);
+       new_psw[0] = (*vcpu)->run->psw_mask;
+       new_psw[1] = (uint64_t)int_handler;
+       memcpy(lowcore + new_psw_off, new_psw, 16);
+       vcpu_regs_get(*vcpu, &regs);
+       regs.gprs[2] = -1;
+       vcpu_regs_set(*vcpu, &regs);
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+       vcpu_guest_debug_set(*vcpu, &debug);
+       vcpu_run(*vcpu);
+
+       return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+    "j .\n");
+
+static void test_step_pgm(void)
+{
+       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+    "diag %r0,%r0,0\n"
+    "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+       struct kvm_s390_irq irq = {
+               .type = KVM_S390_PROGRAM_INT,
+               .u.pgm.code = PGM_SPECIFICATION,
+       };
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+                            __LC_PGM_NEW_PSW, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+    "iske %r2,%r2\n"
+    "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+    "lctl %c0,%c0,1\n"
+    "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+    "svc 0\n"
+    "j .\n");
+
+static void test_step_svc(void)
+{
+       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "single-step pgm", test_step_pgm },
+       { "single-step pgm caused by diag", test_step_pgm_diag },
+       { "single-step pgm caused by iske", test_step_pgm_iske },
+       { "single-step pgm caused by lctl", test_step_pgm_lctl },
+       { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c

new file mode 100644 (file)

index 0000000..4374b4c
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/memop.c
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x KVM_S390_MEM_OP
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+enum mop_target {
+       LOGICAL,
+       SIDA,
+       ABSOLUTE,
+       INVALID,
+};
+
+enum mop_access_mode {
+       READ,
+       WRITE,
+       CMPXCHG,
+};
+
+struct mop_desc {
+       uintptr_t gaddr;
+       uintptr_t gaddr_v;
+       uint64_t set_flags;
+       unsigned int f_check : 1;
+       unsigned int f_inject : 1;
+       unsigned int f_key : 1;
+       unsigned int _gaddr_v : 1;
+       unsigned int _set_flags : 1;
+       unsigned int _sida_offset : 1;
+       unsigned int _ar : 1;
+       uint32_t size;
+       enum mop_target target;
+       enum mop_access_mode mode;
+       void *buf;
+       uint32_t sida_offset;
+       void *old;
+       uint8_t old_value[16];
+       bool *cmpxchg_success;
+       uint8_t ar;
+       uint8_t key;
+};
+
+const uint8_t NO_KEY = 0xff;
+
+static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
+{
+       struct kvm_s390_mem_op ksmo = {
+               .gaddr = (uintptr_t)desc->gaddr,
+               .size = desc->size,
+               .buf = ((uintptr_t)desc->buf),
+               .reserved = "ignored_ignored_ignored_ignored"
+       };
+
+       switch (desc->target) {
+       case LOGICAL:
+               if (desc->mode == READ)
+                       ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+               if (desc->mode == WRITE)
+                       ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+               break;
+       case SIDA:
+               if (desc->mode == READ)
+                       ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+               if (desc->mode == WRITE)
+                       ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+               break;
+       case ABSOLUTE:
+               if (desc->mode == READ)
+                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
+               if (desc->mode == WRITE)
+                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+               if (desc->mode == CMPXCHG) {
+                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
+                       ksmo.old_addr = (uint64_t)desc->old;
+                       memcpy(desc->old_value, desc->old, desc->size);
+               }
+               break;
+       case INVALID:
+               ksmo.op = -1;
+       }
+       if (desc->f_check)
+               ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
+       if (desc->f_inject)
+               ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
+       if (desc->_set_flags)
+               ksmo.flags = desc->set_flags;
+       if (desc->f_key && desc->key != NO_KEY) {
+               ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
+               ksmo.key = desc->key;
+       }
+       if (desc->_ar)
+               ksmo.ar = desc->ar;
+       else
+               ksmo.ar = 0;
+       if (desc->_sida_offset)
+               ksmo.sida_offset = desc->sida_offset;
+
+       return ksmo;
+}
+
+struct test_info {
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+};
+
+#define PRINT_MEMOP false
+static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
+{
+       if (!PRINT_MEMOP)
+               return;
+
+       if (!vcpu)
+               printf("vm memop(");
+       else
+               printf("vcpu memop(");
+       switch (ksmo->op) {
+       case KVM_S390_MEMOP_LOGICAL_READ:
+               printf("LOGICAL, READ, ");
+               break;
+       case KVM_S390_MEMOP_LOGICAL_WRITE:
+               printf("LOGICAL, WRITE, ");
+               break;
+       case KVM_S390_MEMOP_SIDA_READ:
+               printf("SIDA, READ, ");
+               break;
+       case KVM_S390_MEMOP_SIDA_WRITE:
+               printf("SIDA, WRITE, ");
+               break;
+       case KVM_S390_MEMOP_ABSOLUTE_READ:
+               printf("ABSOLUTE, READ, ");
+               break;
+       case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+               printf("ABSOLUTE, WRITE, ");
+               break;
+       case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+               printf("ABSOLUTE, CMPXCHG, ");
+               break;
+       }
+       printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
+              ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
+              ksmo->old_addr);
+       if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
+               printf(", CHECK_ONLY");
+       if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
+               printf(", INJECT_EXCEPTION");
+       if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
+               printf(", SKEY_PROTECTION");
+       puts(")");
+}
+
+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+                          struct mop_desc *desc)
+{
+       struct kvm_vcpu *vcpu = info.vcpu;
+
+       if (!vcpu)
+               return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
+       else
+               return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
+}
+
+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+                       struct mop_desc *desc)
+{
+       int r;
+
+       r = err_memop_ioctl(info, ksmo, desc);
+       if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
+               if (desc->cmpxchg_success) {
+                       int diff = memcmp(desc->old_value, desc->old, desc->size);
+                       *desc->cmpxchg_success = !diff;
+               }
+       }
+       TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
+}
+
+#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...)    \
+({                                                                             \
+       struct test_info __info = (info_p);                                     \
+       struct mop_desc __desc = {                                              \
+               .target = (mop_target_p),                                       \
+               .mode = (access_mode_p),                                        \
+               .buf = (buf_p),                                                 \
+               .size = (size_p),                                               \
+               __VA_ARGS__                                                     \
+       };                                                                      \
+       struct kvm_s390_mem_op __ksmo;                                          \
+                                                                               \
+       if (__desc._gaddr_v) {                                                  \
+               if (__desc.target == ABSOLUTE)                                  \
+                       __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
+               else                                                            \
+                       __desc.gaddr = __desc.gaddr_v;                          \
+       }                                                                       \
+       __ksmo = ksmo_from_desc(&__desc);                                       \
+       print_memop(__info.vcpu, &__ksmo);                                      \
+       err##memop_ioctl(__info, &__ksmo, &__desc);                             \
+})
+
+#define MOP(...) MEMOP(, __VA_ARGS__)
+#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
+
+#define GADDR(a) .gaddr = ((uintptr_t)a)
+#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
+#define CHECK_ONLY .f_check = 1
+#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
+#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
+#define AR(a) ._ar = 1, .ar = (a)
+#define KEY(a) .f_key = 1, .key = (a)
+#define INJECT .f_inject = 1
+#define CMPXCHG_OLD(o) .old = (o)
+#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
+
+#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
+
+#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
+
+static uint8_t __aligned(PAGE_SIZE) mem1[65536];
+static uint8_t __aligned(PAGE_SIZE) mem2[65536];
+
+struct test_default {
+       struct kvm_vm *kvm_vm;
+       struct test_info vm;
+       struct test_info vcpu;
+       struct kvm_run *run;
+       int size;
+};
+
+static struct test_default test_default_init(void *guest_code)
+{
+       struct kvm_vcpu *vcpu;
+       struct test_default t;
+
+       t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
+       t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       t.vm = (struct test_info) { t.kvm_vm, NULL };
+       t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
+       t.run = vcpu->run;
+       return t;
+}
+
+enum stage {
+       /* Synced state set by host, e.g. DAT */
+       STAGE_INITED,
+       /* Guest did nothing */
+       STAGE_IDLED,
+       /* Guest set storage keys (specifics up to test case) */
+       STAGE_SKEYS_SET,
+       /* Guest copied memory (locations up to test case) */
+       STAGE_COPIED,
+       /* End of guest code reached */
+       STAGE_DONE,
+};
+
+#define HOST_SYNC(info_p, stage)                                       \
+({                                                                     \
+       struct test_info __info = (info_p);                             \
+       struct kvm_vcpu *__vcpu = __info.vcpu;                          \
+       struct ucall uc;                                                \
+       int __stage = (stage);                                          \
+                                                                       \
+       vcpu_run(__vcpu);                                               \
+       get_ucall(__vcpu, &uc);                                         \
+       if (uc.cmd == UCALL_ABORT) {                                    \
+               REPORT_GUEST_ASSERT(uc);                                \
+       }                                                               \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
+})                                                                     \
+
+static void prepare_mem12(void)
+{
+       int i;
+
+       for (i = 0; i < sizeof(mem1); i++)
+               mem1[i] = rand();
+       memset(mem2, 0xaa, sizeof(mem2));
+}
+
+#define ASSERT_MEM_EQ(p1, p2, size) \
+       TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
+
+static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
+                              enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+       prepare_mem12();
+       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
+                  GADDR_V(mem1), KEY(key));
+       HOST_SYNC(copy_cpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+                  GADDR_V(mem2), KEY(key));
+       ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
+                        enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+       prepare_mem12();
+       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
+       HOST_SYNC(copy_cpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+                  GADDR_V(mem2), KEY(key));
+       ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_cmpxchg(struct test_default *test, uint8_t key)
+{
+       for (int size = 1; size <= 16; size *= 2) {
+               for (int offset = 0; offset < 16; offset += size) {
+                       uint8_t __aligned(16) new[16] = {};
+                       uint8_t __aligned(16) old[16];
+                       bool succ;
+
+                       prepare_mem12();
+                       default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
+
+                       memcpy(&old, mem1, 16);
+                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+                           size, GADDR_V(mem1 + offset),
+                           CMPXCHG_OLD(old + offset),
+                           CMPXCHG_SUCCESS(&succ), KEY(key));
+                       HOST_SYNC(test->vcpu, STAGE_COPIED);
+                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+                       TEST_ASSERT(succ, "exchange of values should succeed");
+                       memcpy(mem1 + offset, new + offset, size);
+                       ASSERT_MEM_EQ(mem1, mem2, 16);
+
+                       memcpy(&old, mem1, 16);
+                       new[offset]++;
+                       old[offset]++;
+                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+                           size, GADDR_V(mem1 + offset),
+                           CMPXCHG_OLD(old + offset),
+                           CMPXCHG_SUCCESS(&succ), KEY(key));
+                       HOST_SYNC(test->vcpu, STAGE_COPIED);
+                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+                       TEST_ASSERT(!succ, "exchange of values should not succeed");
+                       ASSERT_MEM_EQ(mem1, mem2, 16);
+                       ASSERT_MEM_EQ(&old, mem1, 16);
+               }
+       }
+}
+
+static void guest_copy(void)
+{
+       GUEST_SYNC(STAGE_INITED);
+       memcpy(&mem2, &mem1, sizeof(mem2));
+       GUEST_SYNC(STAGE_COPIED);
+}
+
+static void test_copy(void)
+{
+       struct test_default t = test_default_init(guest_copy);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_access_register(void)
+{
+       struct test_default t = test_default_init(guest_copy);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       prepare_mem12();
+       t.run->psw_mask &= ~(3UL << (63 - 17));
+       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
+
+       /*
+        * Primary address space gets used if an access register
+        * contains zero. The host makes use of AR[1] so is a good
+        * candidate to ensure the guest AR (of zero) is used.
+        */
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
+                  GADDR_V(mem1), AR(1));
+       HOST_SYNC(t.vcpu, STAGE_COPIED);
+
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
+                  GADDR_V(mem2), AR(1));
+       ASSERT_MEM_EQ(mem1, mem2, t.size);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+{
+       uintptr_t _addr, abs, i;
+       int not_mapped = 0;
+
+       _addr = (uintptr_t)addr;
+       for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+               abs = i;
+               asm volatile (
+                              "lra     %[abs], 0(0,%[abs])\n"
+                       "       jz      0f\n"
+                       "       llill   %[not_mapped],1\n"
+                       "       j       1f\n"
+                       "0:     sske    %[key], %[abs]\n"
+                       "1:"
+                       : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
+                       : [key] "r" (key)
+                       : "cc"
+               );
+               GUEST_ASSERT_EQ(not_mapped, 0);
+       }
+}
+
+static void guest_copy_key(void)
+{
+       set_storage_key_range(mem1, sizeof(mem1), 0x90);
+       set_storage_key_range(mem2, sizeof(mem2), 0x90);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (;;) {
+               memcpy(&mem2, &mem1, sizeof(mem2));
+               GUEST_SYNC(STAGE_COPIED);
+       }
+}
+
+static void test_copy_key(void)
+{
+       struct test_default t = test_default_init(guest_copy_key);
+
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm, no key */
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
+
+       /* vm/vcpu, machting key or key 0 */
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+       /*
+        * There used to be different code paths for key handling depending on
+        * if the region crossed a page boundary.
+        * There currently are not, but the more tests the merrier.
+        */
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
+
+       /* vm/vcpu, mismatching keys on read, but no fetch protection */
+       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_cmpxchg_key(void)
+{
+       struct test_default t = test_default_init(guest_copy_key);
+
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       default_cmpxchg(&t, NO_KEY);
+       default_cmpxchg(&t, 0);
+       default_cmpxchg(&t, 9);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static __uint128_t cut_to_size(int size, __uint128_t val)
+{
+       switch (size) {
+       case 1:
+               return (uint8_t)val;
+       case 2:
+               return (uint16_t)val;
+       case 4:
+               return (uint32_t)val;
+       case 8:
+               return (uint64_t)val;
+       case 16:
+               return val;
+       }
+       GUEST_FAIL("Invalid size = %u", size);
+       return 0;
+}
+
+static bool popcount_eq(__uint128_t a, __uint128_t b)
+{
+       unsigned int count_a, count_b;
+
+       count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
+                 __builtin_popcountl((uint64_t)a);
+       count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
+                 __builtin_popcountl((uint64_t)b);
+       return count_a == count_b;
+}
+
+static __uint128_t rotate(int size, __uint128_t val, int amount)
+{
+       unsigned int bits = size * 8;
+
+       amount = (amount + bits) % bits;
+       val = cut_to_size(size, val);
+       if (!amount)
+               return val;
+       return (val << (bits - amount)) | (val >> amount);
+}
+
+const unsigned int max_block = 16;
+
+static void choose_block(bool guest, int i, int *size, int *offset)
+{
+       unsigned int rand;
+
+       rand = i;
+       if (guest) {
+               rand = rand * 19 + 11;
+               *size = 1 << ((rand % 3) + 2);
+               rand = rand * 19 + 11;
+               *offset = (rand % max_block) & ~(*size - 1);
+       } else {
+               rand = rand * 17 + 5;
+               *size = 1 << (rand % 5);
+               rand = rand * 17 + 5;
+               *offset = (rand % max_block) & ~(*size - 1);
+       }
+}
+
+static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
+{
+       unsigned int rand;
+       int amount;
+       bool swap;
+
+       rand = i;
+       rand = rand * 3 + 1;
+       if (guest)
+               rand = rand * 3 + 1;
+       swap = rand % 2 == 0;
+       if (swap) {
+               int i, j;
+               __uint128_t new;
+               uint8_t byte0, byte1;
+
+               rand = rand * 3 + 1;
+               i = rand % size;
+               rand = rand * 3 + 1;
+               j = rand % size;
+               if (i == j)
+                       return old;
+               new = rotate(16, old, i * 8);
+               byte0 = new & 0xff;
+               new &= ~0xff;
+               new = rotate(16, new, -i * 8);
+               new = rotate(16, new, j * 8);
+               byte1 = new & 0xff;
+               new = (new & ~0xff) | byte0;
+               new = rotate(16, new, -j * 8);
+               new = rotate(16, new, i * 8);
+               new = new | byte1;
+               new = rotate(16, new, -i * 8);
+               return new;
+       }
+       rand = rand * 3 + 1;
+       amount = rand % (size * 8);
+       return rotate(size, old, amount);
+}
+
+static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
+{
+       bool ret;
+
+       switch (size) {
+       case 4: {
+                       uint32_t old = *old_addr;
+
+                       asm volatile ("cs %[old],%[new],%[address]"
+                           : [old] "+d" (old),
+                             [address] "+Q" (*(uint32_t *)(target))
+                           : [new] "d" ((uint32_t)new)
+                           : "cc"
+                       );
+                       ret = old == (uint32_t)*old_addr;
+                       *old_addr = old;
+                       return ret;
+               }
+       case 8: {
+                       uint64_t old = *old_addr;
+
+                       asm volatile ("csg %[old],%[new],%[address]"
+                           : [old] "+d" (old),
+                             [address] "+Q" (*(uint64_t *)(target))
+                           : [new] "d" ((uint64_t)new)
+                           : "cc"
+                       );
+                       ret = old == (uint64_t)*old_addr;
+                       *old_addr = old;
+                       return ret;
+               }
+       case 16: {
+                       __uint128_t old = *old_addr;
+
+                       asm volatile ("cdsg %[old],%[new],%[address]"
+                           : [old] "+d" (old),
+                             [address] "+Q" (*(__uint128_t *)(target))
+                           : [new] "d" (new)
+                           : "cc"
+                       );
+                       ret = old == *old_addr;
+                       *old_addr = old;
+                       return ret;
+               }
+       }
+       GUEST_FAIL("Invalid size = %u", size);
+       return 0;
+}
+
+const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
+
+static void guest_cmpxchg_key(void)
+{
+       int size, offset;
+       __uint128_t old, new;
+
+       set_storage_key_range(mem1, max_block, 0x10);
+       set_storage_key_range(mem2, max_block, 0x10);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (int i = 0; i < cmpxchg_iter_outer; i++) {
+               do {
+                       old = 1;
+               } while (!_cmpxchg(16, mem1, &old, 0));
+               for (int j = 0; j < cmpxchg_iter_inner; j++) {
+                       choose_block(true, i + j, &size, &offset);
+                       do {
+                               new = permutate_bits(true, i + j, size, old);
+                       } while (!_cmpxchg(size, mem2 + offset, &old, new));
+               }
+       }
+
+       GUEST_SYNC(STAGE_DONE);
+}
+
+static void *run_guest(void *data)
+{
+       struct test_info *info = data;
+
+       HOST_SYNC(*info, STAGE_DONE);
+       return NULL;
+}
+
+static char *quad_to_char(__uint128_t *quad, int size)
+{
+       return ((char *)quad) + (sizeof(*quad) - size);
+}
+
+static void test_cmpxchg_key_concurrent(void)
+{
+       struct test_default t = test_default_init(guest_cmpxchg_key);
+       int size, offset;
+       __uint128_t old, new;
+       bool success;
+       pthread_t thread;
+
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+       prepare_mem12();
+       MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
+       pthread_create(&thread, NULL, run_guest, &t.vcpu);
+
+       for (int i = 0; i < cmpxchg_iter_outer; i++) {
+               do {
+                       old = 0;
+                       new = 1;
+                       MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
+                           sizeof(new), GADDR_V(mem1),
+                           CMPXCHG_OLD(&old),
+                           CMPXCHG_SUCCESS(&success), KEY(1));
+               } while (!success);
+               for (int j = 0; j < cmpxchg_iter_inner; j++) {
+                       choose_block(false, i + j, &size, &offset);
+                       do {
+                               new = permutate_bits(false, i + j, size, old);
+                               MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
+                                   size, GADDR_V(mem2 + offset),
+                                   CMPXCHG_OLD(quad_to_char(&old, size)),
+                                   CMPXCHG_SUCCESS(&success), KEY(1));
+                       } while (!success);
+               }
+       }
+
+       pthread_join(thread, NULL);
+
+       MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
+       TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
+                   "Must retain number of set bits");
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_copy_key_fetch_prot(void)
+{
+       /*
+        * For some reason combining the first sync with override enablement
+        * results in an exception when calling HOST_SYNC.
+        */
+       GUEST_SYNC(STAGE_INITED);
+       /* Storage protection override applies to both store and fetch. */
+       set_storage_key_range(mem1, sizeof(mem1), 0x98);
+       set_storage_key_range(mem2, sizeof(mem2), 0x98);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (;;) {
+               memcpy(&mem2, &mem1, sizeof(mem2));
+               GUEST_SYNC(STAGE_COPIED);
+       }
+}
+
+static void test_copy_key_storage_prot_override(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys, storage protection override in effect */
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_key_fetch_prot(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm/vcpu, matching key, fetch protection in effect */
+       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+#define ERR_PROT_MOP(...)                                                      \
+({                                                                             \
+       int rv;                                                                 \
+                                                                               \
+       rv = ERR_MOP(__VA_ARGS__);                                              \
+       TEST_ASSERT(rv == 4, "Should result in protection exception");          \
+})
+
+static void guest_error_key(void)
+{
+       GUEST_SYNC(STAGE_INITED);
+       set_storage_key_range(mem1, PAGE_SIZE, 0x18);
+       set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+       GUEST_SYNC(STAGE_IDLED);
+}
+
+static void test_errors_key(void)
+{
+       struct test_default t = test_default_init(guest_error_key);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm/vcpu, mismatching keys, fetch protection in effect */
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg_key(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+       int i;
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       for (i = 1; i <= 16; i *= 2) {
+               __uint128_t old = 0;
+
+               ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
+                            CMPXCHG_OLD(&old), KEY(2));
+       }
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_termination(void)
+{
+       struct test_default t = test_default_init(guest_error_key);
+       uint64_t prefix;
+       uint64_t teid;
+       uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+       uint64_t psw[2];
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys after first page */
+       ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
+       /*
+        * The memop injected a program exception and the test needs to check the
+        * Translation-Exception Identification (TEID). It is necessary to run
+        * the guest in order to be able to read the TEID from guest memory.
+        * Set the guest program new PSW, so the guest state is not clobbered.
+        */
+       prefix = t.run->s.regs.prefix;
+       psw[0] = t.run->psw_mask;
+       psw[1] = t.run->psw_addr;
+       MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
+       HOST_SYNC(t.vcpu, STAGE_IDLED);
+       MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
+       /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
+       TEST_ASSERT_EQ(teid & teid_mask, 0);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_storage_prot_override(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm, mismatching keys, storage protection override not applicable to vm */
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+const uint64_t last_page_addr = -PAGE_SIZE;
+
+static void guest_copy_key_fetch_prot_override(void)
+{
+       int i;
+       char *page_0 = 0;
+
+       GUEST_SYNC(STAGE_INITED);
+       set_storage_key_range(0, PAGE_SIZE, 0x18);
+       set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
+       asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (;;) {
+               for (i = 0; i < PAGE_SIZE; i++)
+                       page_0[i] = mem1[i];
+               GUEST_SYNC(STAGE_COPIED);
+       }
+}
+
+static void test_copy_key_fetch_prot_override(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+       vm_vaddr_t guest_0_page, guest_last_page;
+
+       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+               print_skip("did not allocate guest pages at required positions");
+               goto out;
+       }
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys on fetch, fetch protection override applies */
+       prepare_mem12();
+       MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
+       HOST_SYNC(t.vcpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+       ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+       /*
+        * vcpu, mismatching keys on fetch, fetch protection override applies,
+        * wraparound
+        */
+       prepare_mem12();
+       MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
+       HOST_SYNC(t.vcpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
+                  GADDR_V(guest_last_page), KEY(2));
+       ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+out:
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_not_enabled(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+       vm_vaddr_t guest_0_page, guest_last_page;
+
+       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+               print_skip("did not allocate guest pages at required positions");
+               goto out;
+       }
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
+
+out:
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_enabled(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+       vm_vaddr_t guest_0_page, guest_last_page;
+
+       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+               print_skip("did not allocate guest pages at required positions");
+               goto out;
+       }
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /*
+        * vcpu, mismatching keys on fetch,
+        * fetch protection override does not apply because memory range exceeded
+        */
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
+                  GADDR_V(guest_last_page), KEY(2));
+       /* vm, fetch protected override does not apply */
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+
+out:
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_idle(void)
+{
+       GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
+       for (;;)
+               GUEST_SYNC(STAGE_IDLED);
+}
+
+static void _test_errors_common(struct test_info info, enum mop_target target, int size)
+{
+       int rv;
+
+       /* Bad size: */
+       rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
+
+       /* Zero size: */
+       rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
+                   "ioctl allows 0 as size");
+
+       /* Bad flags: */
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
+
+       /* Bad guest address: */
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
+       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
+
+       /* Bad host address: */
+       rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && errno == EFAULT,
+                   "ioctl does not report bad host memory address");
+
+       /* Bad key: */
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
+}
+
+static void test_errors(void)
+{
+       struct test_default t = test_default_init(guest_idle);
+       int rv;
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       _test_errors_common(t.vcpu, LOGICAL, t.size);
+       _test_errors_common(t.vm, ABSOLUTE, t.size);
+
+       /* Bad operation: */
+       rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+       /* virtual addresses are not translated when passing INVALID */
+       rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
+       /* Bad access register: */
+       t.run->psw_mask &= ~(3UL << (63 - 17));
+       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
+       HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
+       rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
+       t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
+       HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
+
+       /* Check that the SIDA calls are rejected for non-protected guests */
+       rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+       TEST_ASSERT(rv == -1 && errno == EINVAL,
+                   "ioctl does not reject SIDA_READ in non-protected mode");
+       rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+       TEST_ASSERT(rv == -1 && errno == EINVAL,
+                   "ioctl does not reject SIDA_WRITE in non-protected mode");
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg(void)
+{
+       struct test_default t = test_default_init(guest_idle);
+       __uint128_t old;
+       int rv, i, power = 1;
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       for (i = 0; i < 32; i++) {
+               if (i == power) {
+                       power *= 2;
+                       continue;
+               }
+               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
+                            CMPXCHG_OLD(&old));
+               TEST_ASSERT(rv == -1 && errno == EINVAL,
+                           "ioctl allows bad size for cmpxchg");
+       }
+       for (i = 1; i <= 16; i *= 2) {
+               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
+                            CMPXCHG_OLD(&old));
+               TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
+       }
+       for (i = 2; i <= 16; i *= 2) {
+               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
+                            CMPXCHG_OLD(&old));
+               TEST_ASSERT(rv == -1 && errno == EINVAL,
+                           "ioctl allows bad alignment for cmpxchg");
+       }
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+int main(int argc, char *argv[])
+{
+       int extension_cap, idx;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
+       extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
+
+       struct testdef {
+               const char *name;
+               void (*test)(void);
+               bool requirements_met;
+       } testlist[] = {
+               {
+                       .name = "simple copy",
+                       .test = test_copy,
+                       .requirements_met = true,
+               },
+               {
+                       .name = "generic error checks",
+                       .test = test_errors,
+                       .requirements_met = true,
+               },
+               {
+                       .name = "copy with storage keys",
+                       .test = test_copy_key,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "cmpxchg with storage keys",
+                       .test = test_cmpxchg_key,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "concurrently cmpxchg with storage keys",
+                       .test = test_cmpxchg_key_concurrent,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "copy with key storage protection override",
+                       .test = test_copy_key_storage_prot_override,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "copy with key fetch protection",
+                       .test = test_copy_key_fetch_prot,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "copy with key fetch protection override",
+                       .test = test_copy_key_fetch_prot_override,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "copy with access register mode",
+                       .test = test_copy_access_register,
+                       .requirements_met = true,
+               },
+               {
+                       .name = "error checks with key",
+                       .test = test_errors_key,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks for cmpxchg with key",
+                       .test = test_errors_cmpxchg_key,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "error checks for cmpxchg",
+                       .test = test_errors_cmpxchg,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "termination",
+                       .test = test_termination,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks with key storage protection override",
+                       .test = test_errors_key_storage_prot_override,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks without key fetch prot override",
+                       .test = test_errors_key_fetch_prot_override_not_enabled,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks with key fetch prot override",
+                       .test = test_errors_key_fetch_prot_override_enabled,
+                       .requirements_met = extension_cap > 0,
+               },
+       };
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               if (testlist[idx].requirements_met) {
+                       testlist[idx].test();
+                       ksft_test_result_pass("%s\n", testlist[idx].name);
+               } else {
+                       ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
+                                             testlist[idx].name, extension_cap);
+               }
+       }
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c

new file mode 100644 (file)

index 0000000..b58f75b
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/resets.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x CPU resets
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define LOCAL_IRQS 32
+
+#define ARBITRARY_NON_ZERO_VCPU_ID 3
+
+struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
+
+static uint8_t regs_null[512];
+
+static void guest_code_initial(void)
+{
+       /* set several CRs to "safe" value */
+       unsigned long cr2_59 = 0x10;    /* enable guarded storage */
+       unsigned long cr8_63 = 0x1;     /* monitor mask = 1 */
+       unsigned long cr10 = 1;         /* PER START */
+       unsigned long cr11 = -1;        /* PER END */
+
+
+       /* Dirty registers */
+       asm volatile (
+               "       lghi    2,0x11\n"       /* Round toward 0 */
+               "       sfpc    2\n"            /* set fpc to !=0 */
+               "       lctlg   2,2,%0\n"
+               "       lctlg   8,8,%1\n"
+               "       lctlg   10,10,%2\n"
+               "       lctlg   11,11,%3\n"
+               /* now clobber some general purpose regs */
+               "       llihh   0,0xffff\n"
+               "       llihl   1,0x5555\n"
+               "       llilh   2,0xaaaa\n"
+               "       llill   3,0x0000\n"
+               /* now clobber a floating point reg */
+               "       lghi    4,0x1\n"
+               "       cdgbr   0,4\n"
+               /* now clobber an access reg */
+               "       sar     9,4\n"
+               /* We embed diag 501 here to control register content */
+               "       diag 0,0,0x501\n"
+               :
+               : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
+               /* no clobber list as this should not return */
+               );
+}
+
+static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
+{
+       uint64_t eval_reg;
+
+       eval_reg = vcpu_get_reg(vcpu, id);
+       TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
+}
+
+static void assert_noirq(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_irq_state irq_state;
+       int irqs;
+
+       irq_state.len = sizeof(buf);
+       irq_state.buf = (unsigned long)buf;
+       irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
+       /*
+        * irqs contains the number of retrieved interrupts. Any interrupt
+        * (notably, the emergency call interrupt we have injected) should
+        * be cleared by the resets, so this should be 0.
+        */
+       TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
+       TEST_ASSERT(!irqs, "IRQ pending");
+}
+
+static void assert_clear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+       struct kvm_fpu fpu;
+
+       vcpu_regs_get(vcpu, &regs);
+       TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
+
+       vcpu_sregs_get(vcpu, &sregs);
+       TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
+
+       vcpu_fpu_get(vcpu, &fpu);
+       TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+
+       /* sync regs */
+       TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
+                   "gprs0-15 == 0 (sync_regs)");
+
+       TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
+                   "acrs0-15 == 0 (sync_regs)");
+
+       TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
+                   "vrs0-15 == 0 (sync_regs)");
+}
+
+static void assert_initial_noclear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
+       TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
+                   "gpr0 == 0xffff000000000000 (sync_regs)");
+       TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
+                   "gpr1 == 0x0000555500000000 (sync_regs)");
+       TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
+                   "gpr2 == 0x00000000aaaa0000 (sync_regs)");
+       TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
+                   "gpr3 == 0x0000000000000000 (sync_regs)");
+       TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
+                   "fpr0 == 0f1 (sync_regs)");
+       TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
+}
+
+static void assert_initial(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+       struct kvm_sregs sregs;
+       struct kvm_fpu fpu;
+
+       /* KVM_GET_SREGS */
+       vcpu_sregs_get(vcpu, &sregs);
+       TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
+       TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
+                   "cr14 == 0xC2000000 (KVM_GET_SREGS)");
+       TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
+                   "cr1-13 == 0 (KVM_GET_SREGS)");
+       TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
+
+       /* sync regs */
+       TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
+                   "cr14 == 0xC2000000 (sync_regs)");
+       TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
+                   "cr1-13 == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
+
+       /* kvm_run */
+       TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+       TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+
+       vcpu_fpu_get(vcpu, &fpu);
+       TEST_ASSERT(!fpu.fpc, "fpc == 0");
+
+       test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
+       test_one_reg(vcpu, KVM_REG_S390_PP, 0);
+       test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
+       test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
+       test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
+}
+
+static void assert_normal_noclear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
+       TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
+}
+
+static void assert_normal(struct kvm_vcpu *vcpu)
+{
+       test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+       TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
+                       "pft == 0xff.....  (sync_regs)");
+       assert_noirq(vcpu);
+}
+
+static void inject_irq(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_irq_state irq_state;
+       struct kvm_s390_irq *irq = &buf[0];
+       int irqs;
+
+       /* Inject IRQ */
+       irq_state.len = sizeof(struct kvm_s390_irq);
+       irq_state.buf = (unsigned long)buf;
+       irq->type = KVM_S390_INT_EMERGENCY;
+       irq->u.emerg.code = vcpu->id;
+       irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
+       TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
+}
+
+static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create(1);
+
+       *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
+
+       return vm;
+}
+
+static void test_normal(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       ksft_print_msg("Testing normal reset\n");
+       vm = create_vm(&vcpu);
+
+       vcpu_run(vcpu);
+
+       inject_irq(vcpu);
+
+       vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
+
+       /* must clears */
+       assert_normal(vcpu);
+       /* must not clears */
+       assert_normal_noclear(vcpu);
+       assert_initial_noclear(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_initial(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       ksft_print_msg("Testing initial reset\n");
+       vm = create_vm(&vcpu);
+
+       vcpu_run(vcpu);
+
+       inject_irq(vcpu);
+
+       vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
+
+       /* must clears */
+       assert_normal(vcpu);
+       assert_initial(vcpu);
+       /* must not clears */
+       assert_initial_noclear(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_clear(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       ksft_print_msg("Testing clear reset\n");
+       vm = create_vm(&vcpu);
+
+       vcpu_run(vcpu);
+
+       inject_irq(vcpu);
+
+       vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
+
+       /* must clears */
+       assert_normal(vcpu);
+       assert_initial(vcpu);
+       assert_clear(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+struct testdef {
+       const char *name;
+       void (*test)(void);
+       bool needs_cap;
+} testlist[] = {
+       { "initial", test_initial, false },
+       { "normal", test_normal, true },
+       { "clear", test_clear, true },
+};
+
+int main(int argc, char *argv[])
+{
+       bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
+                       testlist[idx].test();
+                       ksft_test_result_pass("%s\n", testlist[idx].name);
+               } else {
+                       ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
+                                             testlist[idx].name);
+               }
+       }
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c

new file mode 100644 (file)

index 0000000..bba0d9a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+       asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+       /* Issue some storage key instruction. */
+       set_storage_key((void *)0, 0x98);
+       GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+       struct page_region region;
+       struct pm_scan_arg arg = {
+               .start = (uintptr_t)addr,
+               .end = (uintptr_t)addr + 4096,
+               .vec = (uintptr_t)&region,
+               .vec_len = 1,
+               .size = sizeof(struct pm_scan_arg),
+               .category_mask = PAGE_IS_PFNZERO,
+               .category_anyof_mask = PAGE_IS_PRESENT,
+               .return_mask = PAGE_IS_PFNZERO,
+       };
+       return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+       char *mem, *page0, *page1, *page2, tmp;
+       const size_t pagesize = getpagesize();
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int pagemap_fd;
+
+       ksft_print_header();
+       ksft_set_plan(3);
+
+       /*
+        * We'll use memory that is not mapped into the VM for simplicity.
+        * Shared zeropages are enabled/disabled per-process.
+        */
+       mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+       TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+       /* Disable THP. Ignore errors on older kernels. */
+       madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+       page0 = mem;
+       page1 = page0 + pagesize;
+       page2 = page1 + pagesize;
+
+       /* Can we even detect shared zeropages? */
+       pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+       TEST_REQUIRE(pagemap_fd >= 0);
+
+       tmp = *page0;
+       asm volatile("" : "+r" (tmp));
+       TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Verify that we get the shared zeropage after VM creation. */
+       tmp = *page1;
+       asm volatile("" : "+r" (tmp));
+       ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+                        "Shared zeropages should be enabled\n");
+
+       /*
+        * Let our VM execute a storage key instruction that should
+        * unshare all shared zeropages.
+        */
+       vcpu_run(vcpu);
+       get_ucall(vcpu, &uc);
+       TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+       /* Verify that we don't have a shared zeropage anymore. */
+       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+                        "Shared zeropage should be gone\n");
+
+       /* Verify that we don't get any new shared zeropages. */
+       tmp = *page2;
+       asm volatile("" : "+r" (tmp));
+       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+                        "Shared zeropages should be disabled\n");
+
+       kvm_vm_free(vm);
+
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c

new file mode 100644 (file)

index 0000000..53def35
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_CAP_SYNC_REGS
+ *
+ * Based on the same test for x86:
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Adaptions for s390x:
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "diag318_test_handler.h"
+#include "kselftest.h"
+
+static void guest_code(void)
+{
+       /*
+        * We embed diag 501 here instead of doing a ucall to avoid that
+        * the compiler has messed with r11 at the time of the ucall.
+        */
+       asm volatile (
+               "0:     diag 0,0,0x501\n"
+               "       ahi 11,1\n"
+               "       j 0b\n"
+       );
+}
+
+#define REG_COMPARE(reg) \
+       TEST_ASSERT(left->reg == right->reg, \
+                   "Register " #reg \
+                   " values did not match: 0x%llx, 0x%llx", \
+                   left->reg, right->reg)
+
+#define REG_COMPARE32(reg) \
+       TEST_ASSERT(left->reg == right->reg, \
+                   "Register " #reg \
+                   " values did not match: 0x%x, 0x%x", \
+                   left->reg, right->reg)
+
+
+static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
+{
+       int i;
+
+       for (i = 0; i < 16; i++)
+               REG_COMPARE(gprs[i]);
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
+{
+       int i;
+
+       for (i = 0; i < 16; i++)
+               REG_COMPARE32(acrs[i]);
+
+       for (i = 0; i < 16; i++)
+               REG_COMPARE(crs[i]);
+}
+
+#undef REG_COMPARE
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
+#define INVALID_SYNC_FIELD 0x80000000
+
+void test_read_invalid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request reading invalid register set from VCPU. */
+       run->kvm_valid_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+
+       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+}
+
+void test_set_invalid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request setting invalid register set into VCPU. */
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+}
+
+void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+       int rv;
+
+       /* Request and verify all valid register sets. */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
+                   (run->s390_sieic.ipa >> 8) == 0x83 &&
+                   (run->s390_sieic.ipb >> 16) == 0x501,
+                   "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
+                   run->s390_sieic.icptcode, run->s390_sieic.ipa,
+                   run->s390_sieic.ipb);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+       int rv;
+
+       /* Set and verify various register values */
+       run->s.regs.gprs[11] = 0xBAD1DEA;
+       run->s.regs.acrs[0] = 1 << 11;
+
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+
+       if (get_diag318_info() > 0) {
+               run->s.regs.diag318 = get_diag318_info();
+               run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+       }
+
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
+                   "r11 sync regs value incorrect 0x%llx.",
+                   run->s.regs.gprs[11]);
+       TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
+                   "acr0 sync regs value incorrect 0x%x.",
+                   run->s.regs.acrs[0]);
+       TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Clear kvm_dirty_regs bits, verify new s.regs values are
+        * overwritten with existing guest values.
+        */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = 0;
+       run->s.regs.gprs[11] = 0xDEADBEEF;
+       run->s.regs.diag318 = 0x4B1D;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
+                   "r11 sync regs value incorrect 0x%llx.",
+                   run->s.regs.gprs[11]);
+       TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
+}
+
+struct testdef {
+       const char *name;
+       void (*test)(struct kvm_vcpu *vcpu);
+} testlist[] = {
+       { "read invalid", test_read_invalid },
+       { "set invalid", test_set_invalid },
+       { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
+       { "set+verify various regs", test_set_and_verify_various_reg_values },
+       { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
+};
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int idx;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+
+       ksft_print_header();
+
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test(vcpu);
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+
+       kvm_vm_free(vm);
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c

new file mode 100644 (file)

index 0000000..12d5e1c
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/tprot.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ *
+ * Copyright IBM Corp. 2021
+ */
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+/* Nonzero return value indicates that address not mapped */
+static int set_storage_key(void *addr, uint8_t key)
+{
+       int not_mapped = 0;
+
+       asm volatile (
+                      "lra     %[addr], 0(0,%[addr])\n"
+               "       jz      0f\n"
+               "       llill   %[not_mapped],1\n"
+               "       j       1f\n"
+               "0:     sske    %[key], %[addr]\n"
+               "1:"
+               : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+               : [key] "r" (key)
+               : "cc"
+       );
+       return -not_mapped;
+}
+
+enum permission {
+       READ_WRITE = 0,
+       READ = 1,
+       RW_PROTECTED = 2,
+       TRANSL_UNAVAIL = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+       uint64_t mask;
+
+       asm volatile (
+                      "tprot   %[addr], 0(%[key])\n"
+               "       ipm     %[mask]\n"
+               : [mask] "=r" (mask)
+               : [addr] "Q" (*(char *)addr),
+                 [key] "a" (key)
+               : "cc"
+       );
+
+       return (enum permission)(mask >> 28);
+}
+
+enum stage {
+       STAGE_INIT_SIMPLE,
+       TEST_SIMPLE,
+       STAGE_INIT_FETCH_PROT_OVERRIDE,
+       TEST_FETCH_PROT_OVERRIDE,
+       TEST_STORAGE_PROT_OVERRIDE,
+       STAGE_END       /* must be the last entry (it's the amount of tests) */
+};
+
+struct test {
+       enum stage stage;
+       void *addr;
+       uint8_t key;
+       enum permission expected;
+} tests[] = {
+       /*
+        * We perform each test in the array by executing TEST PROTECTION on
+        * the specified addr with the specified key and checking if the returned
+        * permissions match the expected value.
+        * Both guest and host cooperate to set up the required test conditions.
+        * A central condition is that the page targeted by addr has to be DAT
+        * protected in the host mappings, in order for KVM to emulate the
+        * TEST PROTECTION instruction.
+        * Since the page tables are shared, the host uses mprotect to achieve
+        * this.
+        *
+        * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
+        * by SIE, not KVM, but there is no harm in testing them also.
+        * See Enhanced Suppression-on-Protection Facilities in the
+        * Interpretive-Execution Mode
+        */
+       /*
+        * guest: set storage key of page_store_prot to 1
+        *        storage key of page_fetch_prot to 9 and enable
+        *        protection for it
+        * STAGE_INIT_SIMPLE
+        * host: write protect both via mprotect
+        */
+       /* access key 0 matches any storage key -> RW */
+       { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+       /* access key matches storage key -> RW */
+       { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+       /* mismatched keys, but no fetch protection -> RO */
+       { TEST_SIMPLE, page_store_prot, 0x20, READ },
+       /* access key 0 matches any storage key -> RW */
+       { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+       /* access key matches storage key -> RW */
+       { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+       /* mismatched keys, fetch protection -> inaccessible */
+       { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
+       /* page 0 not mapped yet -> translation not available */
+       { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
+       /*
+        * host: try to map page 0
+        * guest: set storage key of page 0 to 9 and enable fetch protection
+        * STAGE_INIT_FETCH_PROT_OVERRIDE
+        * host: write protect page 0
+        *       enable fetch protection override
+        */
+       /* mismatched keys, fetch protection, but override applies -> RO */
+       { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+       /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
+       { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
+       /*
+        * host: enable storage protection override
+        */
+       /* mismatched keys, but override applies (storage key 9) -> RW */
+       { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+       /* mismatched keys, no fetch protection, override doesn't apply -> RO */
+       { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+       /* mismatched keys, but override applies (storage key 9) -> RW */
+       { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+       /* end marker */
+       { STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+       enum stage stage = tests[*i].stage;
+       enum permission result;
+       bool skip;
+
+       for (; tests[*i].stage == stage; (*i)++) {
+               /*
+                * Some fetch protection override tests require that page 0
+                * be mapped, however, when the hosts tries to map that page via
+                * vm_vaddr_alloc, it may happen that some other page gets mapped
+                * instead.
+                * In order to skip these tests we detect this inside the guest
+                */
+               skip = tests[*i].addr < (void *)PAGE_SIZE &&
+                      tests[*i].expected != TRANSL_UNAVAIL &&
+                      !mapped_0;
+               if (!skip) {
+                       result = test_protection(tests[*i].addr, tests[*i].key);
+                       __GUEST_ASSERT(result == tests[*i].expected,
+                                      "Wanted %u, got %u, for i = %u",
+                                      tests[*i].expected, result, *i);
+               }
+       }
+       return stage;
+}
+
+static void guest_code(void)
+{
+       bool mapped_0;
+       int i = 0;
+
+       GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+       GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+       GUEST_SYNC(STAGE_INIT_SIMPLE);
+       GUEST_SYNC(perform_next_stage(&i, false));
+
+       /* Fetch-protection override */
+       mapped_0 = !set_storage_key((void *)0, 0x98);
+       GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+       GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+       /* Storage-protection override */
+       GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC_NO_TAP(vcpup, stage)                         \
+({                                                             \
+       struct kvm_vcpu *__vcpu = (vcpup);                      \
+       struct ucall uc;                                        \
+       int __stage = (stage);                                  \
+                                                               \
+       vcpu_run(__vcpu);                                       \
+       get_ucall(__vcpu, &uc);                                 \
+       if (uc.cmd == UCALL_ABORT)                              \
+               REPORT_GUEST_ASSERT(uc);                        \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
+})
+
+#define HOST_SYNC(vcpu, stage)                 \
+({                                             \
+       HOST_SYNC_NO_TAP(vcpu, stage);          \
+       ksft_test_result_pass("" #stage "\n");  \
+})
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       vm_vaddr_t guest_0_page;
+
+       ksft_print_header();
+       ksft_set_plan(STAGE_END);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
+       mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+       HOST_SYNC(vcpu, TEST_SIMPLE);
+
+       guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+       if (guest_0_page != 0) {
+               /* Use NO_TAP so we don't get a PASS print */
+               HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+               ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
+                                     "Did not allocate page at 0\n");
+       } else {
+               HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+       }
+       if (guest_0_page == 0)
+               mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+       run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+       run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
+
+       run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+       run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
+
+       kvm_vm_free(vm);
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c

new file mode 100644 (file)

index 0000000..0c11231
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -0,0 +1,638 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test code for the s390x kvm ucontrol interface
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+#include "debug_print.h"
+#include "kselftest_harness.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sie.h"
+
+#include <linux/capability.h>
+#include <linux/sizes.h>
+
+#define PGM_SEGMENT_TRANSLATION 0x10
+
+#define VM_MEM_SIZE (4 * SZ_1M)
+#define VM_MEM_EXT_SIZE (2 * SZ_1M)
+#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
+
+/* so directly declare capget to check caps without libcap */
+int capget(cap_user_header_t header, cap_user_data_t data);
+
+/**
+ * In order to create user controlled virtual machines on S390,
+ * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
+ * as privileged user (SYS_ADMIN).
+ */
+void require_ucontrol_admin(void)
+{
+       struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+       struct __user_cap_header_struct hdr = {
+               .version = _LINUX_CAPABILITY_VERSION_3,
+       };
+       int rc;
+
+       rc = capget(&hdr, data);
+       TEST_ASSERT_EQ(0, rc);
+       TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+}
+
+/* Test program setting some registers and looping */
+extern char test_gprs_asm[];
+asm("test_gprs_asm:\n"
+       "xgr    %r0, %r0\n"
+       "lgfi   %r1,1\n"
+       "lgfi   %r2,2\n"
+       "lgfi   %r3,3\n"
+       "lgfi   %r4,4\n"
+       "lgfi   %r5,5\n"
+       "lgfi   %r6,6\n"
+       "lgfi   %r7,7\n"
+       "0:\n"
+       "       diag    0,0,0x44\n"
+       "       ahi     %r0,1\n"
+       "       j       0b\n"
+);
+
+/* Test program manipulating memory */
+extern char test_mem_asm[];
+asm("test_mem_asm:\n"
+       "xgr    %r0, %r0\n"
+
+       "0:\n"
+       "       ahi     %r0,1\n"
+       "       st      %r1,0(%r5,%r6)\n"
+
+       "       xgr     %r1,%r1\n"
+       "       l       %r1,0(%r5,%r6)\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       j       0b\n"
+);
+
+/* Test program manipulating storage keys */
+extern char test_skey_asm[];
+asm("test_skey_asm:\n"
+       "xgr    %r0, %r0\n"
+
+       "0:\n"
+       "       ahi     %r0,1\n"
+       "       st      %r1,0(%r5,%r6)\n"
+
+       "       iske    %r1,%r6\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       sske    %r1,%r6\n"
+       "       xgr     %r1,%r1\n"
+       "       iske    %r1,%r6\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       rrbe    %r1,%r6\n"
+       "       iske    %r1,%r6\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       j       0b\n"
+);
+
+FIXTURE(uc_kvm)
+{
+       struct kvm_s390_sie_block *sie_block;
+       struct kvm_run *run;
+       uintptr_t base_gpa;
+       uintptr_t code_gpa;
+       uintptr_t base_hva;
+       uintptr_t code_hva;
+       int kvm_run_size;
+       vm_paddr_t pgd;
+       void *vm_mem;
+       int vcpu_fd;
+       int kvm_fd;
+       int vm_fd;
+};
+
+/**
+ * create VM with single vcpu, map kvm_run and SIE control block for easy access
+ */
+FIXTURE_SETUP(uc_kvm)
+{
+       struct kvm_s390_vm_cpu_processor info;
+       int rc;
+
+       require_ucontrol_admin();
+
+       self->kvm_fd = open_kvm_dev_path_or_exit();
+       self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+       ASSERT_GE(self->vm_fd, 0);
+
+       kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
+                           KVM_S390_VM_CPU_PROCESSOR, &info);
+       TH_LOG("create VM 0x%llx", info.cpuid);
+
+       self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
+       ASSERT_GE(self->vcpu_fd, 0);
+
+       self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+       ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
+                 TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
+       self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
+                   PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
+       ASSERT_NE(self->run, MAP_FAILED);
+       /**
+        * For virtual cpus that have been created with S390 user controlled
+        * virtual machines, the resulting vcpu fd can be memory mapped at page
+        * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
+        * the virtual cpu's hardware control block.
+        */
+       self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
+                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                         self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
+       ASSERT_NE(self->sie_block, MAP_FAILED);
+
+       TH_LOG("VM created %p %p", self->run, self->sie_block);
+
+       self->base_gpa = 0;
+       self->code_gpa = self->base_gpa + (3 * SZ_1M);
+
+       self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
+       ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
+       self->base_hva = (uintptr_t)self->vm_mem;
+       self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
+       struct kvm_s390_ucas_mapping map = {
+               .user_addr = self->base_hva,
+               .vcpu_addr = self->base_gpa,
+               .length = VM_MEM_SIZE,
+       };
+       TH_LOG("ucas map %p %p 0x%llx",
+              (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+       rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+       ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
+                               rc, strerror(errno));
+
+       TH_LOG("page in %p", (void *)self->base_gpa);
+       rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
+       ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
+                               (void *)self->base_hva, rc, strerror(errno));
+
+       self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
+}
+
+FIXTURE_TEARDOWN(uc_kvm)
+{
+       munmap(self->sie_block, PAGE_SIZE);
+       munmap(self->run, self->kvm_run_size);
+       close(self->vcpu_fd);
+       close(self->vm_fd);
+       close(self->kvm_fd);
+       free(self->vm_mem);
+}
+
+TEST_F(uc_kvm, uc_sie_assertions)
+{
+       /* assert interception of Code 08 (Program Interruption) is set */
+       EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
+}
+
+TEST_F(uc_kvm, uc_attr_mem_limit)
+{
+       u64 limit;
+       struct kvm_device_attr attr = {
+               .group = KVM_S390_VM_MEM_CTRL,
+               .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
+               .addr = (unsigned long)&limit,
+       };
+       int rc;
+
+       rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
+       EXPECT_EQ(0, rc);
+       EXPECT_EQ(~0UL, limit);
+
+       /* assert set not supported */
+       rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_no_dirty_log)
+{
+       struct kvm_dirty_log dlog;
+       int rc;
+
+       rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EINVAL, errno);
+}
+
+/**
+ * Assert HPAGE CAP cannot be enabled on UCONTROL VM
+ */
+TEST(uc_cap_hpage)
+{
+       int rc, kvm_fd, vm_fd, vcpu_fd;
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_S390_HPAGE_1M,
+       };
+
+       require_ucontrol_admin();
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+       vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+       ASSERT_GE(vm_fd, 0);
+
+       /* assert hpages are not supported on ucontrol vm */
+       rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
+       EXPECT_EQ(0, rc);
+
+       /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
+       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EINVAL, errno);
+
+       /* assert HPAGE CAP is rejected after vCPU creation */
+       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+       ASSERT_GE(vcpu_fd, 0);
+       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EBUSY, errno);
+
+       close(vcpu_fd);
+       close(vm_fd);
+       close(kvm_fd);
+}
+
+/* calculate host virtual addr from guest physical addr */
+static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
+{
+       return (void *)(self->base_hva - self->base_gpa + gpa);
+}
+
+/* map / make additional memory available */
+static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+       struct kvm_s390_ucas_mapping map = {
+               .user_addr = (u64)gpa2hva(self, vcpu_addr),
+               .vcpu_addr = vcpu_addr,
+               .length = length,
+       };
+       pr_info("ucas map %p %p 0x%llx",
+               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+       return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+}
+
+/* unmap previously mapped memory */
+static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+       struct kvm_s390_ucas_mapping map = {
+               .user_addr = (u64)gpa2hva(self, vcpu_addr),
+               .vcpu_addr = vcpu_addr,
+               .length = length,
+       };
+       pr_info("ucas unmap %p %p 0x%llx",
+               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+       return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
+}
+
+/* handle ucontrol exit by mapping the accessed segment */
+static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_run *run = self->run;
+       u64 seg_addr;
+       int rc;
+
+       TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+       switch (run->s390_ucontrol.pgm_code) {
+       case PGM_SEGMENT_TRANSLATION:
+               seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
+               pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
+                       run->s390_ucontrol.trans_exc_code, seg_addr);
+               /* map / make additional memory available */
+               rc = uc_map_ext(self, seg_addr, SZ_1M);
+               TEST_ASSERT_EQ(0, rc);
+               break;
+       default:
+               TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
+       }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+       /* disable KSS */
+       sie_block->cpuflags &= ~CPUSTAT_KSS;
+       /* disable skey inst interception */
+       sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+/*
+ * Handle the instruction intercept
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+       int ilen = insn_length(sie_block->ipa >> 8);
+       struct kvm_run *run = self->run;
+
+       switch (run->s390_sieic.ipa) {
+       case 0xB229: /* ISKE */
+       case 0xB22b: /* SSKE */
+       case 0xB22a: /* RRBE */
+               uc_skey_enable(self);
+
+               /* rewind to reexecute intercepted instruction */
+               run->psw_addr = run->psw_addr - ilen;
+               pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+       struct kvm_run *run = self->run;
+
+       /* check SIE interception code */
+       pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
+               run->s390_sieic.icptcode,
+               run->s390_sieic.ipa,
+               run->s390_sieic.ipb);
+       switch (run->s390_sieic.icptcode) {
+       case ICPT_INST:
+               /* end execution in caller on intercepted instruction */
+               pr_info("sie instruction interception\n");
+               return uc_handle_insn_ic(self);
+       case ICPT_KSS:
+               uc_skey_enable(self);
+               return true;
+       case ICPT_OPEREXC:
+               /* operation exception */
+               TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
+       default:
+               TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
+       }
+       return true;
+}
+
+/* verify VM state on exit */
+static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_run *run = self->run;
+
+       switch (run->exit_reason) {
+       case KVM_EXIT_S390_UCONTROL:
+               /** check program interruption code
+                * handle page fault --> ucas map
+                */
+               uc_handle_exit_ucontrol(self);
+               break;
+       case KVM_EXIT_S390_SIEIC:
+               return uc_handle_sieic(self);
+       default:
+               pr_info("exit_reason %2d not handled\n", run->exit_reason);
+       }
+       return true;
+}
+
+/* run the VM until interrupted */
+static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
+{
+       int rc;
+
+       rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
+       print_run(self->run, self->sie_block);
+       print_regs(self->run);
+       pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
+       return rc;
+}
+
+static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+       /* assert vm was interrupted by diag 0x0044 */
+       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+       TEST_ASSERT_EQ(0x8300, sie_block->ipa);
+       TEST_ASSERT_EQ(0x440000, sie_block->ipb);
+}
+
+TEST_F(uc_kvm, uc_no_user_region)
+{
+       struct kvm_userspace_memory_region region = {
+               .slot = 1,
+               .guest_phys_addr = self->code_gpa,
+               .memory_size = VM_MEM_EXT_SIZE,
+               .userspace_addr = (uintptr_t)self->code_hva,
+       };
+       struct kvm_userspace_memory_region2 region2 = {
+               .slot = 1,
+               .guest_phys_addr = self->code_gpa,
+               .memory_size = VM_MEM_EXT_SIZE,
+               .userspace_addr = (uintptr_t)self->code_hva,
+       };
+
+       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
+       ASSERT_EQ(EINVAL, errno);
+
+       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
+       ASSERT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_map_unmap)
+{
+       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+       struct kvm_run *run = self->run;
+       const u64 disp = 1;
+       int rc;
+
+       /* copy test_mem_asm to code_hva / code_gpa */
+       TH_LOG("copy code %p to vm mapped memory %p / %p",
+              &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
+       memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
+
+       /* DAT disabled + 64 bit mode */
+       run->psw_mask = 0x0000000180000000ULL;
+       run->psw_addr = self->code_gpa;
+
+       /* set register content for test_mem_asm to access not mapped memory*/
+       sync_regs->gprs[1] = 0x55;
+       sync_regs->gprs[5] = self->base_gpa;
+       sync_regs->gprs[6] = VM_MEM_SIZE + disp;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+       /* run and expect to fail with ucontrol pic segment translation */
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(1, sync_regs->gprs[0]);
+       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+
+       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+       ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
+
+       /* fail to map memory with not segment aligned address */
+       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
+       ASSERT_GT(0, rc)
+               TH_LOG("ucas map for non segment address should fail but didn't; "
+                      "result %d not expected, %s", rc, strerror(errno));
+
+       /* map / make additional memory available */
+       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+       ASSERT_EQ(0, rc)
+               TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       uc_assert_diag44(self);
+
+       /* assert registers and memory are in expected state */
+       ASSERT_EQ(2, sync_regs->gprs[0]);
+       ASSERT_EQ(0x55, sync_regs->gprs[1]);
+       ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
+
+       /* unmap and run loop again */
+       rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+       ASSERT_EQ(0, rc)
+               TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(3, sync_regs->gprs[0]);
+       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+       /* handle ucontrol exit and remap memory after previous map and unmap */
+       ASSERT_EQ(true, uc_handle_exit(self));
+}
+
+TEST_F(uc_kvm, uc_gprs)
+{
+       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+       struct kvm_run *run = self->run;
+       struct kvm_regs regs = {};
+
+       /* Set registers to values that are different from the ones that we expect below */
+       for (int i = 0; i < 8; i++)
+               sync_regs->gprs[i] = 8;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+       /* copy test_gprs_asm to code_hva / code_gpa */
+       TH_LOG("copy code %p to vm mapped memory %p / %p",
+              &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
+       memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
+
+       /* DAT disabled + 64 bit mode */
+       run->psw_mask = 0x0000000180000000ULL;
+       run->psw_addr = self->code_gpa;
+
+       /* run and expect interception of diag 44 */
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       uc_assert_diag44(self);
+
+       /* Retrieve and check guest register values */
+       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+       for (int i = 0; i < 8; i++) {
+               ASSERT_EQ(i, regs.gprs[i]);
+               ASSERT_EQ(i, sync_regs->gprs[i]);
+       }
+
+       /* run and expect interception of diag 44 again */
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       uc_assert_diag44(self);
+
+       /* check continued increment of register 0 value */
+       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+       ASSERT_EQ(1, regs.gprs[0]);
+       ASSERT_EQ(1, sync_regs->gprs[0]);
+}
+
+TEST_F(uc_kvm, uc_skey)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+       u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
+       struct kvm_run *run = self->run;
+       const u8 skeyvalue = 0x34;
+
+       /* copy test_skey_asm to code_hva / code_gpa */
+       TH_LOG("copy code %p to vm mapped memory %p / %p",
+              &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
+       memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
+
+       /* set register content for test_skey_asm to access not mapped memory */
+       sync_regs->gprs[1] = skeyvalue;
+       sync_regs->gprs[5] = self->base_gpa;
+       sync_regs->gprs[6] = test_vaddr;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+       /* DAT disabled + 64 bit mode */
+       run->psw_mask = 0x0000000180000000ULL;
+       run->psw_addr = self->code_gpa;
+
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(true, uc_handle_exit(self));
+       ASSERT_EQ(1, sync_regs->gprs[0]);
+
+       /* ISKE */
+       ASSERT_EQ(0, uc_run_once(self));
+
+       /*
+        * Bail out and skip the test after uc_skey_enable was executed but iske
+        * is still intercepted. Instructions are not handled by the kernel.
+        * Thus there is no need to test this here.
+        */
+       TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
+       TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
+       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+       TEST_REQUIRE(sie_block->ipa != 0xb229);
+
+       /* ISKE contd. */
+       ASSERT_EQ(false, uc_handle_exit(self));
+       ASSERT_EQ(2, sync_regs->gprs[0]);
+       /* assert initial skey (ACC = 0, R & C = 1) */
+       ASSERT_EQ(0x06, sync_regs->gprs[1]);
+       uc_assert_diag44(self);
+
+       /* SSKE + ISKE */
+       sync_regs->gprs[1] = skeyvalue;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       ASSERT_EQ(3, sync_regs->gprs[0]);
+       ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
+       uc_assert_diag44(self);
+
+       /* RRBE + ISKE */
+       sync_regs->gprs[1] = skeyvalue;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       ASSERT_EQ(4, sync_regs->gprs[0]);
+       /* assert R reset but rest of skey unchanged */
+       ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
+       ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
+       uc_assert_diag44(self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c

deleted file mode 100644 (file)

index e32dd59..0000000
--- a/tools/testing/selftests/kvm/s390x/cmma_test.c
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for s390x CMMA migration
- *
- * Copyright IBM Corp. 2023
- *
- * Authors:
- *  Nico Boehr <nrb@linux.ibm.com>
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-#define MAIN_PAGE_COUNT 512
-
-#define TEST_DATA_PAGE_COUNT 512
-#define TEST_DATA_MEMSLOT 1
-#define TEST_DATA_START_GFN PAGE_SIZE
-
-#define TEST_DATA_TWO_PAGE_COUNT 256
-#define TEST_DATA_TWO_MEMSLOT 2
-#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
-
-static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
-
-/**
- * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
- * so use_cmma goes on and the CMMA related ioctls do something.
- */
-static void guest_do_one_essa(void)
-{
-       asm volatile(
-               /* load TEST_DATA_START_GFN into r1 */
-               "       llilf 1,%[start_gfn]\n"
-               /* calculate the address from the gfn */
-               "       sllg 1,1,12(0)\n"
-               /* set the first page in TEST_DATA memslot to STABLE */
-               "       .insn rrf,0xb9ab0000,2,1,1,0\n"
-               /* hypercall */
-               "       diag 0,0,0x501\n"
-               "0:     j 0b"
-               :
-               : [start_gfn] "L"(TEST_DATA_START_GFN)
-               : "r1", "r2", "memory", "cc"
-       );
-}
-
-/**
- * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
- * state.
- */
-static void guest_dirty_test_data(void)
-{
-       asm volatile(
-               /* r1 = TEST_DATA_START_GFN */
-               "       xgr 1,1\n"
-               "       llilf 1,%[start_gfn]\n"
-               /* r5 = TEST_DATA_PAGE_COUNT */
-               "       lghi 5,%[page_count]\n"
-               /* r5 += r1 */
-               "2:     agfr 5,1\n"
-               /* r2 = r1 << PAGE_SHIFT */
-               "1:     sllg 2,1,12(0)\n"
-               /* essa(r4, r2, SET_STABLE) */
-               "       .insn rrf,0xb9ab0000,4,2,1,0\n"
-               /* i++ */
-               "       agfi 1,1\n"
-               /* if r1 < r5 goto 1 */
-               "       cgrjl 1,5,1b\n"
-               /* hypercall */
-               "       diag 0,0,0x501\n"
-               "0:     j 0b"
-               :
-               : [start_gfn] "L"(TEST_DATA_START_GFN),
-                 [page_count] "L"(TEST_DATA_PAGE_COUNT)
-               :
-                       /* the counter in our loop over the pages */
-                       "r1",
-                       /* the calculated page physical address */
-                       "r2",
-                       /* ESSA output register */
-                       "r4",
-                       /* last page */
-                       "r5",
-                       "cc", "memory"
-       );
-}
-
-static void create_main_memslot(struct kvm_vm *vm)
-{
-       int i;
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
-       /* set the array of memslots to zero like __vm_create does */
-       for (i = 0; i < NR_MEM_REGIONS; i++)
-               vm->memslots[i] = 0;
-}
-
-static void create_test_memslot(struct kvm_vm *vm)
-{
-       vm_userspace_mem_region_add(vm,
-                                   VM_MEM_SRC_ANONYMOUS,
-                                   TEST_DATA_START_GFN << vm->page_shift,
-                                   TEST_DATA_MEMSLOT,
-                                   TEST_DATA_PAGE_COUNT,
-                                   0
-                                  );
-       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
-}
-
-static void create_memslots(struct kvm_vm *vm)
-{
-       /*
-        * Our VM has the following memory layout:
-        * +------+---------------------------+
-        * | GFN  | Memslot                   |
-        * +------+---------------------------+
-        * | 0    |                           |
-        * | ...  | MAIN (Code, Stack, ...)   |
-        * | 511  |                           |
-        * +------+---------------------------+
-        * | 4096 |                           |
-        * | ...  | TEST_DATA                 |
-        * | 4607 |                           |
-        * +------+---------------------------+
-        */
-       create_main_memslot(vm);
-       create_test_memslot(vm);
-}
-
-static void finish_vm_setup(struct kvm_vm *vm)
-{
-       struct userspace_mem_region *slot0;
-
-       kvm_vm_elf_load(vm, program_invocation_name);
-
-       slot0 = memslot2region(vm, 0);
-       ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
-
-       kvm_arch_vm_post_create(vm);
-}
-
-static struct kvm_vm *create_vm_two_memslots(void)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create_barebones();
-
-       create_memslots(vm);
-
-       finish_vm_setup(vm);
-
-       return vm;
-}
-
-static void enable_cmma(struct kvm_vm *vm)
-{
-       int r;
-
-       r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
-       TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
-}
-
-static void enable_dirty_tracking(struct kvm_vm *vm)
-{
-       vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
-       vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
-}
-
-static int __enable_migration_mode(struct kvm_vm *vm)
-{
-       return __kvm_device_attr_set(vm->fd,
-                                    KVM_S390_VM_MIGRATION,
-                                    KVM_S390_VM_MIGRATION_START,
-                                    NULL
-                                   );
-}
-
-static void enable_migration_mode(struct kvm_vm *vm)
-{
-       int r = __enable_migration_mode(vm);
-
-       TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
-}
-
-static bool is_migration_mode_on(struct kvm_vm *vm)
-{
-       u64 out;
-       int r;
-
-       r = __kvm_device_attr_get(vm->fd,
-                                 KVM_S390_VM_MIGRATION,
-                                 KVM_S390_VM_MIGRATION_STATUS,
-                                 &out
-                                );
-       TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
-       return out;
-}
-
-static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
-{
-       struct kvm_s390_cmma_log args;
-       int rc;
-
-       errno = 0;
-
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = 0,
-               .count = sizeof(cmma_value_buf),
-               .flags = flags,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-
-       *errno_out = errno;
-       return rc;
-}
-
-static void test_get_cmma_basic(void)
-{
-       struct kvm_vm *vm = create_vm_two_memslots();
-       struct kvm_vcpu *vcpu;
-       int rc, errno_out;
-
-       /* GET_CMMA_BITS without CMMA enabled should fail */
-       rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno_out, ENXIO);
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-
-       vcpu_run(vcpu);
-
-       /* GET_CMMA_BITS without migration mode and without peeking should fail */
-       rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno_out, EINVAL);
-
-       /* GET_CMMA_BITS without migration mode and with peeking should work */
-       rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
-       TEST_ASSERT_EQ(rc, 0);
-       TEST_ASSERT_EQ(errno_out, 0);
-
-       enable_dirty_tracking(vm);
-       enable_migration_mode(vm);
-
-       /* GET_CMMA_BITS with invalid flags */
-       rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno_out, EINVAL);
-
-       kvm_vm_free(vm);
-}
-
-static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
-{
-       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
-}
-
-static void test_migration_mode(void)
-{
-       struct kvm_vm *vm = vm_create_barebones();
-       struct kvm_vcpu *vcpu;
-       u64 orig_psw;
-       int rc;
-
-       /* enabling migration mode on a VM without memory should fail */
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno, EINVAL);
-       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
-       errno = 0;
-
-       create_memslots(vm);
-       finish_vm_setup(vm);
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-       orig_psw = vcpu->run->psw_addr;
-
-       /*
-        * Execute one essa instruction in the guest. Otherwise the guest will
-        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
-        */
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       /* migration mode when memslots have dirty tracking off should fail */
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno, EINVAL);
-       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
-       errno = 0;
-
-       /* enable dirty tracking */
-       enable_dirty_tracking(vm);
-
-       /* enabling migration mode should work now */
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, 0);
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       errno = 0;
-
-       /* execute another ESSA instruction to see this goes fine */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       /*
-        * With migration mode on, create a new memslot with dirty tracking off.
-        * This should turn off migration mode.
-        */
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       vm_userspace_mem_region_add(vm,
-                                   VM_MEM_SRC_ANONYMOUS,
-                                   TEST_DATA_TWO_START_GFN << vm->page_shift,
-                                   TEST_DATA_TWO_MEMSLOT,
-                                   TEST_DATA_TWO_PAGE_COUNT,
-                                   0
-                                  );
-       TEST_ASSERT(!is_migration_mode_on(vm),
-                   "creating memslot without dirty tracking turns off migration mode"
-                  );
-
-       /* ESSA instructions should still execute fine */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       /*
-        * Turn on dirty tracking on the new memslot.
-        * It should be possible to turn migration mode back on again.
-        */
-       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, 0);
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       errno = 0;
-
-       /*
-        * Turn off dirty tracking again, this time with just a flag change.
-        * Again, migration mode should turn off.
-        */
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
-       TEST_ASSERT(!is_migration_mode_on(vm),
-                   "disabling dirty tracking should turn off migration mode"
-                  );
-
-       /* ESSA instructions should still execute fine */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-/**
- * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
- * CMMA attributes of all pages in both memslots and nothing more dirty.
- * This has the useful side effect of ensuring nothing is CMMA dirty after this
- * function.
- */
-static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
-{
-       struct kvm_s390_cmma_log args;
-
-       /*
-        * First iteration - everything should be dirty.
-        * Start at the main memslot...
-        */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = 0,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
-       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
-       TEST_ASSERT_EQ(args.start_gfn, 0);
-
-       /* ...and then - after a hole - the TEST_DATA memslot should follow */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = MAIN_PAGE_COUNT,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
-       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
-       TEST_ASSERT_EQ(args.remaining, 0);
-
-       /* ...and nothing else should be there */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       TEST_ASSERT_EQ(args.count, 0);
-       TEST_ASSERT_EQ(args.start_gfn, 0);
-       TEST_ASSERT_EQ(args.remaining, 0);
-}
-
-/**
- * Given a VM, assert no pages are CMMA dirty.
- */
-static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
-{
-       struct kvm_s390_cmma_log args;
-
-       /* If we start from GFN 0 again, nothing should be dirty. */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = 0,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       if (args.count || args.remaining || args.start_gfn)
-               TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
-                         args.start_gfn,
-                         args.count,
-                         args.remaining
-                        );
-}
-
-static void test_get_inital_dirty(void)
-{
-       struct kvm_vm *vm = create_vm_two_memslots();
-       struct kvm_vcpu *vcpu;
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-
-       /*
-        * Execute one essa instruction in the guest. Otherwise the guest will
-        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
-        */
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       enable_dirty_tracking(vm);
-       enable_migration_mode(vm);
-
-       assert_all_slots_cmma_dirty(vm);
-
-       /* Start from the beginning again and make sure nothing else is dirty */
-       assert_no_pages_cmma_dirty(vm);
-
-       kvm_vm_free(vm);
-}
-
-static void query_cmma_range(struct kvm_vm *vm,
-                            u64 start_gfn, u64 gfn_count,
-                            struct kvm_s390_cmma_log *res_out)
-{
-       *res_out = (struct kvm_s390_cmma_log){
-               .start_gfn = start_gfn,
-               .count = gfn_count,
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
-}
-
-/**
- * Assert the given cmma_log struct that was executed by query_cmma_range()
- * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
- * dirty_gfn_count CMMA values.
- */
-static void assert_cmma_dirty(u64 first_dirty_gfn,
-                             u64 dirty_gfn_count,
-                             const struct kvm_s390_cmma_log *res)
-{
-       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
-       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
-       for (size_t i = 0; i < dirty_gfn_count; i++)
-               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
-       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
-}
-
-static void test_get_skip_holes(void)
-{
-       size_t gfn_offset;
-       struct kvm_vm *vm = create_vm_two_memslots();
-       struct kvm_s390_cmma_log log;
-       struct kvm_vcpu *vcpu;
-       u64 orig_psw;
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
-
-       orig_psw = vcpu->run->psw_addr;
-
-       /*
-        * Execute some essa instructions in the guest. Otherwise the guest will
-        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
-        */
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       enable_dirty_tracking(vm);
-       enable_migration_mode(vm);
-
-       /* un-dirty all pages */
-       assert_all_slots_cmma_dirty(vm);
-
-       /* Then, dirty just the TEST_DATA memslot */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-
-       gfn_offset = TEST_DATA_START_GFN;
-       /**
-        * Query CMMA attributes of one page, starting at page 0. Since the
-        * main memslot was not touched by the VM, this should yield the first
-        * page of the TEST_DATA memslot.
-        * The dirty bitmap should now look like this:
-        * 0: not dirty
-        * [0x1, 0x200): dirty
-        */
-       query_cmma_range(vm, 0, 1, &log);
-       assert_cmma_dirty(gfn_offset, 1, &log);
-       gfn_offset++;
-
-       /**
-        * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
-        * memslot. This should wrap back to the beginning of the TEST_DATA
-        * memslot, page 1.
-        * The dirty bitmap should now look like this:
-        * [0, 0x21): not dirty
-        * [0x21, 0x200): dirty
-        */
-       query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
-       assert_cmma_dirty(gfn_offset, 0x20, &log);
-       gfn_offset += 0x20;
-
-       /* Skip 32 pages */
-       gfn_offset += 0x20;
-
-       /**
-        * After skipping 32 pages, query the next 32 (0x20) pages.
-        * The dirty bitmap should now look like this:
-        * [0, 0x21): not dirty
-        * [0x21, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       query_cmma_range(vm, gfn_offset, 0x20, &log);
-       assert_cmma_dirty(gfn_offset, 0x20, &log);
-       gfn_offset += 0x20;
-
-       /**
-        * Query 1 page from the beginning of the TEST_DATA memslot. This should
-        * yield page 0x21.
-        * The dirty bitmap should now look like this:
-        * [0, 0x22): not dirty
-        * [0x22, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
-       assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
-       gfn_offset++;
-
-       /**
-        * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
-        * This should yield pages [0x23, 0x33).
-        * The dirty bitmap should now look like this:
-        * [0, 0x22): not dirty
-        * 0x22: dirty
-        * [0x23, 0x33): not dirty
-        * [0x33, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x23;
-       query_cmma_range(vm, gfn_offset, 15, &log);
-       assert_cmma_dirty(gfn_offset, 15, &log);
-
-       /**
-        * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
-        * This should yield page [0x22, 0x33)
-        * The dirty bitmap should now look like this:
-        * [0, 0x33): not dirty
-        * [0x33, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x22;
-       query_cmma_range(vm, gfn_offset, 17, &log);
-       assert_cmma_dirty(gfn_offset, 17, &log);
-
-       /**
-        * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
-        * This should yield page 0x40 and nothing more, since there are more
-        * than 16 non-dirty pages after page 0x40.
-        * The dirty bitmap should now look like this:
-        * [0, 0x33): not dirty
-        * [0x33, 0x40): dirty
-        * [0x40, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x40;
-       query_cmma_range(vm, gfn_offset, 25, &log);
-       assert_cmma_dirty(gfn_offset, 1, &log);
-
-       /**
-        * Query pages [0x33, 0x40).
-        * The dirty bitmap should now look like this:
-        * [0, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x33;
-       query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
-       assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
-
-       /**
-        * Query the remaining pages [0x61, 0x200).
-        */
-       gfn_offset = TEST_DATA_START_GFN;
-       query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
-       assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
-
-       assert_no_pages_cmma_dirty(vm);
-}
-
-struct testdef {
-       const char *name;
-       void (*test)(void);
-} testlist[] = {
-       { "migration mode and dirty tracking", test_migration_mode },
-       { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
-       { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
-       { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
-};
-
-/**
- * The kernel may support CMMA, but the machine may not (i.e. if running as
- * guest-3).
- *
- * In this case, the CMMA capabilities are all there, but the CMMA-related
- * ioctls fail. To find out whether the machine supports CMMA, create a
- * temporary VM and then query the CMMA feature of the VM.
- */
-static int machine_has_cmma(void)
-{
-       struct kvm_vm *vm = vm_create_barebones();
-       int r;
-
-       r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
-       kvm_vm_free(vm);
-
-       return r;
-}
-
-int main(int argc, char *argv[])
-{
-       int idx;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
-       TEST_REQUIRE(machine_has_cmma());
-
-       ksft_print_header();
-
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               testlist[idx].test();
-               ksft_test_result_pass("%s\n", testlist[idx].name);
-       }
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config

deleted file mode 100644 (file)

index 23270f2..0000000
--- a/tools/testing/selftests/kvm/s390x/config
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_KVM=y
-CONFIG_KVM_S390_UCONTROL=y
diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c

deleted file mode 100644 (file)

index 2725588..0000000
--- a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c
+++ /dev/null
@@ -1,301 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Hariharan Mari <hari55@linux.ibm.com>
- *
- * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
- * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
- * query data reported via the CPU Model, allowing us to directly compare it with the data
- * acquired through executing the queries in the test.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include "facility.h"
-
-#include "kvm_util.h"
-
-#define PLO_FUNCTION_MAX 256
-
-/* Query available CPU subfunctions */
-struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
-
-static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
-                                       struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
-{
-       int r;
-
-       r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
-                                 KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
-
-       TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
-}
-
-static inline int plo_test_bit(unsigned char nr)
-{
-       unsigned long function = nr | 0x100;
-       int cc;
-
-       asm volatile("  lgr     0,%[function]\n"
-                       /* Parameter registers are ignored for "test bit" */
-                       "       plo     0,0,0,0(0)\n"
-                       "       ipm     %0\n"
-                       "       srl     %0,28\n"
-                       : "=d" (cc)
-                       : [function] "d" (function)
-                       : "cc", "0");
-       return cc == 0;
-}
-
-/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
-static void test_plo_asm_block(u8 (*query)[32])
-{
-       for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
-               if (plo_test_bit(i))
-                       (*query)[i >> 3] |= 0x80 >> (i & 7);
-       }
-}
-
-/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
-static void test_kmac_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb91e0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
-static void test_kmc_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92f0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
-static void test_km_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92e0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
-static void test_kimd_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93e0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
-static void test_klmd_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93f0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
-static void test_kmctr_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rrf,0xb92d0000,2,4,6,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
-static void test_kmf_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92a0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
-static void test_kmo_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92b0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
-static void test_pcc_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92c0000,0,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
-static void test_prno_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93c0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
-static void test_kma_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rrf,0xb9290000,2,4,6,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
-static void test_kdsa_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93a0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
-static void test_sortl_asm_block(u8 (*query)[32])
-{
-       asm volatile("  lghi    0,0\n"
-                       "       la      1,%[query]\n"
-                       "       .insn   rre,0xb9380000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "0", "1");
-}
-
-/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
-static void test_dfltcc_asm_block(u8 (*query)[32])
-{
-       asm volatile("  lghi    0,0\n"
-                       "       la      1,%[query]\n"
-                       "       .insn   rrf,0xb9390000,2,4,6,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "0", "1");
-}
-
-/*
- * Testing Perform Function with Concurrent Results (PFCR)
- * CPU subfunctions's ASM block
- */
-static void test_pfcr_asm_block(u8 (*query)[16])
-{
-       asm volatile("  lghi    0,0\n"
-                       "       .insn   rsy,0xeb0000000016,0,0,%[query]\n"
-                       : [query] "=QS" (*query)
-                       :
-                       : "cc", "0");
-}
-
-typedef void (*testfunc_t)(u8 (*array)[]);
-
-struct testdef {
-       const char *subfunc_name;
-       u8 *subfunc_array;
-       size_t array_size;
-       testfunc_t test;
-       int facility_bit;
-} testlist[] = {
-       /*
-        * PLO was introduced in the very first 64-bit machine generation.
-        * Hence it is assumed PLO is always installed in Z Arch.
-        */
-       { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
-       /* MSA - Facility bit 17 */
-       { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
-       { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
-       { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
-       { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
-       { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
-       /* MSA - Facility bit 77 */
-       { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
-       { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
-       { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
-       { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
-       /* MSA5 - Facility bit 57 */
-       { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
-       /* MSA8 - Facility bit 146 */
-       { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
-       /* MSA9 - Facility bit 155 */
-       { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
-       /* SORTL - Facility bit 150 */
-       { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
-       /* DFLTCC - Facility bit 151 */
-       { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
-       /* Concurrent-function facility - Facility bit 201 */
-       { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
-};
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       int idx;
-
-       ksft_print_header();
-
-       vm = vm_create(1);
-
-       memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
-       get_cpu_machine_subfuntions(vm, &cpu_subfunc);
-
-       ksft_set_plan(ARRAY_SIZE(testlist));
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               if (test_facility(testlist[idx].facility_bit)) {
-                       u8 *array = malloc(testlist[idx].array_size);
-
-                       testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
-
-                       TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
-                                             array, testlist[idx].array_size), 0);
-
-                       ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
-                       free(array);
-               } else {
-                       ksft_test_result_skip("%s feature is not avaialable\n",
-                                             testlist[idx].subfunc_name);
-               }
-       }
-
-       kvm_vm_free(vm);
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c

deleted file mode 100644 (file)

index ad80959..0000000
--- a/tools/testing/selftests/kvm/s390x/debug_test.c
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Test KVM debugging features. */
-#include "kvm_util.h"
-#include "test_util.h"
-#include "sie.h"
-
-#include <linux/kvm.h>
-
-#define __LC_SVC_NEW_PSW 0x1c0
-#define __LC_PGM_NEW_PSW 0x1d0
-#define IPA0_DIAG 0x8300
-#define PGM_SPECIFICATION 0x06
-
-/* Common code for testing single-stepping interruptions. */
-extern char int_handler[];
-asm("int_handler:\n"
-    "j .\n");
-
-static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
-                                     size_t new_psw_off, uint64_t *new_psw)
-{
-       struct kvm_guest_debug debug = {};
-       struct kvm_regs regs;
-       struct kvm_vm *vm;
-       char *lowcore;
-
-       vm = vm_create_with_one_vcpu(vcpu, guest_code);
-       lowcore = addr_gpa2hva(vm, 0);
-       new_psw[0] = (*vcpu)->run->psw_mask;
-       new_psw[1] = (uint64_t)int_handler;
-       memcpy(lowcore + new_psw_off, new_psw, 16);
-       vcpu_regs_get(*vcpu, &regs);
-       regs.gprs[2] = -1;
-       vcpu_regs_set(*vcpu, &regs);
-       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
-       vcpu_guest_debug_set(*vcpu, &debug);
-       vcpu_run(*vcpu);
-
-       return vm;
-}
-
-static void test_step_int(void *guest_code, size_t new_psw_off)
-{
-       struct kvm_vcpu *vcpu;
-       uint64_t new_psw[2];
-       struct kvm_vm *vm;
-
-       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
-       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
-       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
-       kvm_vm_free(vm);
-}
-
-/* Test single-stepping "boring" program interruptions. */
-extern char test_step_pgm_guest_code[];
-asm("test_step_pgm_guest_code:\n"
-    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
-    "j .\n");
-
-static void test_step_pgm(void)
-{
-       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/*
- * Test single-stepping program interruptions caused by DIAG.
- * Userspace emulation must not interfere with single-stepping.
- */
-extern char test_step_pgm_diag_guest_code[];
-asm("test_step_pgm_diag_guest_code:\n"
-    "diag %r0,%r0,0\n"
-    "j .\n");
-
-static void test_step_pgm_diag(void)
-{
-       struct kvm_s390_irq irq = {
-               .type = KVM_S390_PROGRAM_INT,
-               .u.pgm.code = PGM_SPECIFICATION,
-       };
-       struct kvm_vcpu *vcpu;
-       uint64_t new_psw[2];
-       struct kvm_vm *vm;
-
-       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
-                            __LC_PGM_NEW_PSW, new_psw);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
-       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
-       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
-       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
-       kvm_vm_free(vm);
-}
-
-/*
- * Test single-stepping program interruptions caused by ISKE.
- * CPUSTAT_KSS handling must not interfere with single-stepping.
- */
-extern char test_step_pgm_iske_guest_code[];
-asm("test_step_pgm_iske_guest_code:\n"
-    "iske %r2,%r2\n"
-    "j .\n");
-
-static void test_step_pgm_iske(void)
-{
-       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/*
- * Test single-stepping program interruptions caused by LCTL.
- * KVM emulation must not interfere with single-stepping.
- */
-extern char test_step_pgm_lctl_guest_code[];
-asm("test_step_pgm_lctl_guest_code:\n"
-    "lctl %c0,%c0,1\n"
-    "j .\n");
-
-static void test_step_pgm_lctl(void)
-{
-       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/* Test single-stepping supervisor-call interruptions. */
-extern char test_step_svc_guest_code[];
-asm("test_step_svc_guest_code:\n"
-    "svc 0\n"
-    "j .\n");
-
-static void test_step_svc(void)
-{
-       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
-}
-
-/* Run all tests above. */
-static struct testdef {
-       const char *name;
-       void (*test)(void);
-} testlist[] = {
-       { "single-step pgm", test_step_pgm },
-       { "single-step pgm caused by diag", test_step_pgm_diag },
-       { "single-step pgm caused by iske", test_step_pgm_iske },
-       { "single-step pgm caused by lctl", test_step_pgm_lctl },
-       { "single-step svc", test_step_svc },
-};
-
-int main(int argc, char *argv[])
-{
-       int idx;
-
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(testlist));
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               testlist[idx].test();
-               ksft_test_result_pass("%s\n", testlist[idx].name);
-       }
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c

deleted file mode 100644 (file)

index 4374b4c..0000000
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ /dev/null
@@ -1,1187 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x KVM_S390_MEM_OP
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <pthread.h>
-
-#include <linux/bits.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-enum mop_target {
-       LOGICAL,
-       SIDA,
-       ABSOLUTE,
-       INVALID,
-};
-
-enum mop_access_mode {
-       READ,
-       WRITE,
-       CMPXCHG,
-};
-
-struct mop_desc {
-       uintptr_t gaddr;
-       uintptr_t gaddr_v;
-       uint64_t set_flags;
-       unsigned int f_check : 1;
-       unsigned int f_inject : 1;
-       unsigned int f_key : 1;
-       unsigned int _gaddr_v : 1;
-       unsigned int _set_flags : 1;
-       unsigned int _sida_offset : 1;
-       unsigned int _ar : 1;
-       uint32_t size;
-       enum mop_target target;
-       enum mop_access_mode mode;
-       void *buf;
-       uint32_t sida_offset;
-       void *old;
-       uint8_t old_value[16];
-       bool *cmpxchg_success;
-       uint8_t ar;
-       uint8_t key;
-};
-
-const uint8_t NO_KEY = 0xff;
-
-static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
-{
-       struct kvm_s390_mem_op ksmo = {
-               .gaddr = (uintptr_t)desc->gaddr,
-               .size = desc->size,
-               .buf = ((uintptr_t)desc->buf),
-               .reserved = "ignored_ignored_ignored_ignored"
-       };
-
-       switch (desc->target) {
-       case LOGICAL:
-               if (desc->mode == READ)
-                       ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
-               if (desc->mode == WRITE)
-                       ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-               break;
-       case SIDA:
-               if (desc->mode == READ)
-                       ksmo.op = KVM_S390_MEMOP_SIDA_READ;
-               if (desc->mode == WRITE)
-                       ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
-               break;
-       case ABSOLUTE:
-               if (desc->mode == READ)
-                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
-               if (desc->mode == WRITE)
-                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
-               if (desc->mode == CMPXCHG) {
-                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
-                       ksmo.old_addr = (uint64_t)desc->old;
-                       memcpy(desc->old_value, desc->old, desc->size);
-               }
-               break;
-       case INVALID:
-               ksmo.op = -1;
-       }
-       if (desc->f_check)
-               ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
-       if (desc->f_inject)
-               ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
-       if (desc->_set_flags)
-               ksmo.flags = desc->set_flags;
-       if (desc->f_key && desc->key != NO_KEY) {
-               ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
-               ksmo.key = desc->key;
-       }
-       if (desc->_ar)
-               ksmo.ar = desc->ar;
-       else
-               ksmo.ar = 0;
-       if (desc->_sida_offset)
-               ksmo.sida_offset = desc->sida_offset;
-
-       return ksmo;
-}
-
-struct test_info {
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-};
-
-#define PRINT_MEMOP false
-static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
-{
-       if (!PRINT_MEMOP)
-               return;
-
-       if (!vcpu)
-               printf("vm memop(");
-       else
-               printf("vcpu memop(");
-       switch (ksmo->op) {
-       case KVM_S390_MEMOP_LOGICAL_READ:
-               printf("LOGICAL, READ, ");
-               break;
-       case KVM_S390_MEMOP_LOGICAL_WRITE:
-               printf("LOGICAL, WRITE, ");
-               break;
-       case KVM_S390_MEMOP_SIDA_READ:
-               printf("SIDA, READ, ");
-               break;
-       case KVM_S390_MEMOP_SIDA_WRITE:
-               printf("SIDA, WRITE, ");
-               break;
-       case KVM_S390_MEMOP_ABSOLUTE_READ:
-               printf("ABSOLUTE, READ, ");
-               break;
-       case KVM_S390_MEMOP_ABSOLUTE_WRITE:
-               printf("ABSOLUTE, WRITE, ");
-               break;
-       case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
-               printf("ABSOLUTE, CMPXCHG, ");
-               break;
-       }
-       printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
-              ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
-              ksmo->old_addr);
-       if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
-               printf(", CHECK_ONLY");
-       if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
-               printf(", INJECT_EXCEPTION");
-       if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
-               printf(", SKEY_PROTECTION");
-       puts(")");
-}
-
-static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
-                          struct mop_desc *desc)
-{
-       struct kvm_vcpu *vcpu = info.vcpu;
-
-       if (!vcpu)
-               return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
-       else
-               return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
-}
-
-static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
-                       struct mop_desc *desc)
-{
-       int r;
-
-       r = err_memop_ioctl(info, ksmo, desc);
-       if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
-               if (desc->cmpxchg_success) {
-                       int diff = memcmp(desc->old_value, desc->old, desc->size);
-                       *desc->cmpxchg_success = !diff;
-               }
-       }
-       TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
-}
-
-#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...)    \
-({                                                                             \
-       struct test_info __info = (info_p);                                     \
-       struct mop_desc __desc = {                                              \
-               .target = (mop_target_p),                                       \
-               .mode = (access_mode_p),                                        \
-               .buf = (buf_p),                                                 \
-               .size = (size_p),                                               \
-               __VA_ARGS__                                                     \
-       };                                                                      \
-       struct kvm_s390_mem_op __ksmo;                                          \
-                                                                               \
-       if (__desc._gaddr_v) {                                                  \
-               if (__desc.target == ABSOLUTE)                                  \
-                       __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
-               else                                                            \
-                       __desc.gaddr = __desc.gaddr_v;                          \
-       }                                                                       \
-       __ksmo = ksmo_from_desc(&__desc);                                       \
-       print_memop(__info.vcpu, &__ksmo);                                      \
-       err##memop_ioctl(__info, &__ksmo, &__desc);                             \
-})
-
-#define MOP(...) MEMOP(, __VA_ARGS__)
-#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
-
-#define GADDR(a) .gaddr = ((uintptr_t)a)
-#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
-#define CHECK_ONLY .f_check = 1
-#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
-#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
-#define AR(a) ._ar = 1, .ar = (a)
-#define KEY(a) .f_key = 1, .key = (a)
-#define INJECT .f_inject = 1
-#define CMPXCHG_OLD(o) .old = (o)
-#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
-
-#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
-
-#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
-#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
-
-static uint8_t __aligned(PAGE_SIZE) mem1[65536];
-static uint8_t __aligned(PAGE_SIZE) mem2[65536];
-
-struct test_default {
-       struct kvm_vm *kvm_vm;
-       struct test_info vm;
-       struct test_info vcpu;
-       struct kvm_run *run;
-       int size;
-};
-
-static struct test_default test_default_init(void *guest_code)
-{
-       struct kvm_vcpu *vcpu;
-       struct test_default t;
-
-       t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
-       t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       t.vm = (struct test_info) { t.kvm_vm, NULL };
-       t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
-       t.run = vcpu->run;
-       return t;
-}
-
-enum stage {
-       /* Synced state set by host, e.g. DAT */
-       STAGE_INITED,
-       /* Guest did nothing */
-       STAGE_IDLED,
-       /* Guest set storage keys (specifics up to test case) */
-       STAGE_SKEYS_SET,
-       /* Guest copied memory (locations up to test case) */
-       STAGE_COPIED,
-       /* End of guest code reached */
-       STAGE_DONE,
-};
-
-#define HOST_SYNC(info_p, stage)                                       \
-({                                                                     \
-       struct test_info __info = (info_p);                             \
-       struct kvm_vcpu *__vcpu = __info.vcpu;                          \
-       struct ucall uc;                                                \
-       int __stage = (stage);                                          \
-                                                                       \
-       vcpu_run(__vcpu);                                               \
-       get_ucall(__vcpu, &uc);                                         \
-       if (uc.cmd == UCALL_ABORT) {                                    \
-               REPORT_GUEST_ASSERT(uc);                                \
-       }                                                               \
-       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
-       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
-})                                                                     \
-
-static void prepare_mem12(void)
-{
-       int i;
-
-       for (i = 0; i < sizeof(mem1); i++)
-               mem1[i] = rand();
-       memset(mem2, 0xaa, sizeof(mem2));
-}
-
-#define ASSERT_MEM_EQ(p1, p2, size) \
-       TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
-
-static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
-                              enum mop_target mop_target, uint32_t size, uint8_t key)
-{
-       prepare_mem12();
-       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
-                  GADDR_V(mem1), KEY(key));
-       HOST_SYNC(copy_cpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
-                  GADDR_V(mem2), KEY(key));
-       ASSERT_MEM_EQ(mem1, mem2, size);
-}
-
-static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
-                        enum mop_target mop_target, uint32_t size, uint8_t key)
-{
-       prepare_mem12();
-       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
-       HOST_SYNC(copy_cpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
-                  GADDR_V(mem2), KEY(key));
-       ASSERT_MEM_EQ(mem1, mem2, size);
-}
-
-static void default_cmpxchg(struct test_default *test, uint8_t key)
-{
-       for (int size = 1; size <= 16; size *= 2) {
-               for (int offset = 0; offset < 16; offset += size) {
-                       uint8_t __aligned(16) new[16] = {};
-                       uint8_t __aligned(16) old[16];
-                       bool succ;
-
-                       prepare_mem12();
-                       default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
-
-                       memcpy(&old, mem1, 16);
-                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
-                           size, GADDR_V(mem1 + offset),
-                           CMPXCHG_OLD(old + offset),
-                           CMPXCHG_SUCCESS(&succ), KEY(key));
-                       HOST_SYNC(test->vcpu, STAGE_COPIED);
-                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
-                       TEST_ASSERT(succ, "exchange of values should succeed");
-                       memcpy(mem1 + offset, new + offset, size);
-                       ASSERT_MEM_EQ(mem1, mem2, 16);
-
-                       memcpy(&old, mem1, 16);
-                       new[offset]++;
-                       old[offset]++;
-                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
-                           size, GADDR_V(mem1 + offset),
-                           CMPXCHG_OLD(old + offset),
-                           CMPXCHG_SUCCESS(&succ), KEY(key));
-                       HOST_SYNC(test->vcpu, STAGE_COPIED);
-                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
-                       TEST_ASSERT(!succ, "exchange of values should not succeed");
-                       ASSERT_MEM_EQ(mem1, mem2, 16);
-                       ASSERT_MEM_EQ(&old, mem1, 16);
-               }
-       }
-}
-
-static void guest_copy(void)
-{
-       GUEST_SYNC(STAGE_INITED);
-       memcpy(&mem2, &mem1, sizeof(mem2));
-       GUEST_SYNC(STAGE_COPIED);
-}
-
-static void test_copy(void)
-{
-       struct test_default t = test_default_init(guest_copy);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_copy_access_register(void)
-{
-       struct test_default t = test_default_init(guest_copy);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       prepare_mem12();
-       t.run->psw_mask &= ~(3UL << (63 - 17));
-       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
-
-       /*
-        * Primary address space gets used if an access register
-        * contains zero. The host makes use of AR[1] so is a good
-        * candidate to ensure the guest AR (of zero) is used.
-        */
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
-                  GADDR_V(mem1), AR(1));
-       HOST_SYNC(t.vcpu, STAGE_COPIED);
-
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
-                  GADDR_V(mem2), AR(1));
-       ASSERT_MEM_EQ(mem1, mem2, t.size);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void set_storage_key_range(void *addr, size_t len, uint8_t key)
-{
-       uintptr_t _addr, abs, i;
-       int not_mapped = 0;
-
-       _addr = (uintptr_t)addr;
-       for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
-               abs = i;
-               asm volatile (
-                              "lra     %[abs], 0(0,%[abs])\n"
-                       "       jz      0f\n"
-                       "       llill   %[not_mapped],1\n"
-                       "       j       1f\n"
-                       "0:     sske    %[key], %[abs]\n"
-                       "1:"
-                       : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
-                       : [key] "r" (key)
-                       : "cc"
-               );
-               GUEST_ASSERT_EQ(not_mapped, 0);
-       }
-}
-
-static void guest_copy_key(void)
-{
-       set_storage_key_range(mem1, sizeof(mem1), 0x90);
-       set_storage_key_range(mem2, sizeof(mem2), 0x90);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (;;) {
-               memcpy(&mem2, &mem1, sizeof(mem2));
-               GUEST_SYNC(STAGE_COPIED);
-       }
-}
-
-static void test_copy_key(void)
-{
-       struct test_default t = test_default_init(guest_copy_key);
-
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm, no key */
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
-
-       /* vm/vcpu, machting key or key 0 */
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
-       /*
-        * There used to be different code paths for key handling depending on
-        * if the region crossed a page boundary.
-        * There currently are not, but the more tests the merrier.
-        */
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
-
-       /* vm/vcpu, mismatching keys on read, but no fetch protection */
-       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
-       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_cmpxchg_key(void)
-{
-       struct test_default t = test_default_init(guest_copy_key);
-
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       default_cmpxchg(&t, NO_KEY);
-       default_cmpxchg(&t, 0);
-       default_cmpxchg(&t, 9);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static __uint128_t cut_to_size(int size, __uint128_t val)
-{
-       switch (size) {
-       case 1:
-               return (uint8_t)val;
-       case 2:
-               return (uint16_t)val;
-       case 4:
-               return (uint32_t)val;
-       case 8:
-               return (uint64_t)val;
-       case 16:
-               return val;
-       }
-       GUEST_FAIL("Invalid size = %u", size);
-       return 0;
-}
-
-static bool popcount_eq(__uint128_t a, __uint128_t b)
-{
-       unsigned int count_a, count_b;
-
-       count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
-                 __builtin_popcountl((uint64_t)a);
-       count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
-                 __builtin_popcountl((uint64_t)b);
-       return count_a == count_b;
-}
-
-static __uint128_t rotate(int size, __uint128_t val, int amount)
-{
-       unsigned int bits = size * 8;
-
-       amount = (amount + bits) % bits;
-       val = cut_to_size(size, val);
-       if (!amount)
-               return val;
-       return (val << (bits - amount)) | (val >> amount);
-}
-
-const unsigned int max_block = 16;
-
-static void choose_block(bool guest, int i, int *size, int *offset)
-{
-       unsigned int rand;
-
-       rand = i;
-       if (guest) {
-               rand = rand * 19 + 11;
-               *size = 1 << ((rand % 3) + 2);
-               rand = rand * 19 + 11;
-               *offset = (rand % max_block) & ~(*size - 1);
-       } else {
-               rand = rand * 17 + 5;
-               *size = 1 << (rand % 5);
-               rand = rand * 17 + 5;
-               *offset = (rand % max_block) & ~(*size - 1);
-       }
-}
-
-static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
-{
-       unsigned int rand;
-       int amount;
-       bool swap;
-
-       rand = i;
-       rand = rand * 3 + 1;
-       if (guest)
-               rand = rand * 3 + 1;
-       swap = rand % 2 == 0;
-       if (swap) {
-               int i, j;
-               __uint128_t new;
-               uint8_t byte0, byte1;
-
-               rand = rand * 3 + 1;
-               i = rand % size;
-               rand = rand * 3 + 1;
-               j = rand % size;
-               if (i == j)
-                       return old;
-               new = rotate(16, old, i * 8);
-               byte0 = new & 0xff;
-               new &= ~0xff;
-               new = rotate(16, new, -i * 8);
-               new = rotate(16, new, j * 8);
-               byte1 = new & 0xff;
-               new = (new & ~0xff) | byte0;
-               new = rotate(16, new, -j * 8);
-               new = rotate(16, new, i * 8);
-               new = new | byte1;
-               new = rotate(16, new, -i * 8);
-               return new;
-       }
-       rand = rand * 3 + 1;
-       amount = rand % (size * 8);
-       return rotate(size, old, amount);
-}
-
-static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
-{
-       bool ret;
-
-       switch (size) {
-       case 4: {
-                       uint32_t old = *old_addr;
-
-                       asm volatile ("cs %[old],%[new],%[address]"
-                           : [old] "+d" (old),
-                             [address] "+Q" (*(uint32_t *)(target))
-                           : [new] "d" ((uint32_t)new)
-                           : "cc"
-                       );
-                       ret = old == (uint32_t)*old_addr;
-                       *old_addr = old;
-                       return ret;
-               }
-       case 8: {
-                       uint64_t old = *old_addr;
-
-                       asm volatile ("csg %[old],%[new],%[address]"
-                           : [old] "+d" (old),
-                             [address] "+Q" (*(uint64_t *)(target))
-                           : [new] "d" ((uint64_t)new)
-                           : "cc"
-                       );
-                       ret = old == (uint64_t)*old_addr;
-                       *old_addr = old;
-                       return ret;
-               }
-       case 16: {
-                       __uint128_t old = *old_addr;
-
-                       asm volatile ("cdsg %[old],%[new],%[address]"
-                           : [old] "+d" (old),
-                             [address] "+Q" (*(__uint128_t *)(target))
-                           : [new] "d" (new)
-                           : "cc"
-                       );
-                       ret = old == *old_addr;
-                       *old_addr = old;
-                       return ret;
-               }
-       }
-       GUEST_FAIL("Invalid size = %u", size);
-       return 0;
-}
-
-const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
-
-static void guest_cmpxchg_key(void)
-{
-       int size, offset;
-       __uint128_t old, new;
-
-       set_storage_key_range(mem1, max_block, 0x10);
-       set_storage_key_range(mem2, max_block, 0x10);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (int i = 0; i < cmpxchg_iter_outer; i++) {
-               do {
-                       old = 1;
-               } while (!_cmpxchg(16, mem1, &old, 0));
-               for (int j = 0; j < cmpxchg_iter_inner; j++) {
-                       choose_block(true, i + j, &size, &offset);
-                       do {
-                               new = permutate_bits(true, i + j, size, old);
-                       } while (!_cmpxchg(size, mem2 + offset, &old, new));
-               }
-       }
-
-       GUEST_SYNC(STAGE_DONE);
-}
-
-static void *run_guest(void *data)
-{
-       struct test_info *info = data;
-
-       HOST_SYNC(*info, STAGE_DONE);
-       return NULL;
-}
-
-static char *quad_to_char(__uint128_t *quad, int size)
-{
-       return ((char *)quad) + (sizeof(*quad) - size);
-}
-
-static void test_cmpxchg_key_concurrent(void)
-{
-       struct test_default t = test_default_init(guest_cmpxchg_key);
-       int size, offset;
-       __uint128_t old, new;
-       bool success;
-       pthread_t thread;
-
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-       prepare_mem12();
-       MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
-       pthread_create(&thread, NULL, run_guest, &t.vcpu);
-
-       for (int i = 0; i < cmpxchg_iter_outer; i++) {
-               do {
-                       old = 0;
-                       new = 1;
-                       MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
-                           sizeof(new), GADDR_V(mem1),
-                           CMPXCHG_OLD(&old),
-                           CMPXCHG_SUCCESS(&success), KEY(1));
-               } while (!success);
-               for (int j = 0; j < cmpxchg_iter_inner; j++) {
-                       choose_block(false, i + j, &size, &offset);
-                       do {
-                               new = permutate_bits(false, i + j, size, old);
-                               MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
-                                   size, GADDR_V(mem2 + offset),
-                                   CMPXCHG_OLD(quad_to_char(&old, size)),
-                                   CMPXCHG_SUCCESS(&success), KEY(1));
-                       } while (!success);
-               }
-       }
-
-       pthread_join(thread, NULL);
-
-       MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
-       TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
-                   "Must retain number of set bits");
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void guest_copy_key_fetch_prot(void)
-{
-       /*
-        * For some reason combining the first sync with override enablement
-        * results in an exception when calling HOST_SYNC.
-        */
-       GUEST_SYNC(STAGE_INITED);
-       /* Storage protection override applies to both store and fetch. */
-       set_storage_key_range(mem1, sizeof(mem1), 0x98);
-       set_storage_key_range(mem2, sizeof(mem2), 0x98);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (;;) {
-               memcpy(&mem2, &mem1, sizeof(mem2));
-               GUEST_SYNC(STAGE_COPIED);
-       }
-}
-
-static void test_copy_key_storage_prot_override(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys, storage protection override in effect */
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_copy_key_fetch_prot(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm/vcpu, matching key, fetch protection in effect */
-       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
-       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-#define ERR_PROT_MOP(...)                                                      \
-({                                                                             \
-       int rv;                                                                 \
-                                                                               \
-       rv = ERR_MOP(__VA_ARGS__);                                              \
-       TEST_ASSERT(rv == 4, "Should result in protection exception");          \
-})
-
-static void guest_error_key(void)
-{
-       GUEST_SYNC(STAGE_INITED);
-       set_storage_key_range(mem1, PAGE_SIZE, 0x18);
-       set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-       GUEST_SYNC(STAGE_IDLED);
-}
-
-static void test_errors_key(void)
-{
-       struct test_default t = test_default_init(guest_error_key);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm/vcpu, mismatching keys, fetch protection in effect */
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_cmpxchg_key(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-       int i;
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       for (i = 1; i <= 16; i *= 2) {
-               __uint128_t old = 0;
-
-               ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
-                            CMPXCHG_OLD(&old), KEY(2));
-       }
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_termination(void)
-{
-       struct test_default t = test_default_init(guest_error_key);
-       uint64_t prefix;
-       uint64_t teid;
-       uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
-       uint64_t psw[2];
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys after first page */
-       ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
-       /*
-        * The memop injected a program exception and the test needs to check the
-        * Translation-Exception Identification (TEID). It is necessary to run
-        * the guest in order to be able to read the TEID from guest memory.
-        * Set the guest program new PSW, so the guest state is not clobbered.
-        */
-       prefix = t.run->s.regs.prefix;
-       psw[0] = t.run->psw_mask;
-       psw[1] = t.run->psw_addr;
-       MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
-       HOST_SYNC(t.vcpu, STAGE_IDLED);
-       MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
-       /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
-       TEST_ASSERT_EQ(teid & teid_mask, 0);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_storage_prot_override(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm, mismatching keys, storage protection override not applicable to vm */
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-const uint64_t last_page_addr = -PAGE_SIZE;
-
-static void guest_copy_key_fetch_prot_override(void)
-{
-       int i;
-       char *page_0 = 0;
-
-       GUEST_SYNC(STAGE_INITED);
-       set_storage_key_range(0, PAGE_SIZE, 0x18);
-       set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
-       asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (;;) {
-               for (i = 0; i < PAGE_SIZE; i++)
-                       page_0[i] = mem1[i];
-               GUEST_SYNC(STAGE_COPIED);
-       }
-}
-
-static void test_copy_key_fetch_prot_override(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
-       vm_vaddr_t guest_0_page, guest_last_page;
-
-       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
-       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
-       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
-               print_skip("did not allocate guest pages at required positions");
-               goto out;
-       }
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys on fetch, fetch protection override applies */
-       prepare_mem12();
-       MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
-       HOST_SYNC(t.vcpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
-       ASSERT_MEM_EQ(mem1, mem2, 2048);
-
-       /*
-        * vcpu, mismatching keys on fetch, fetch protection override applies,
-        * wraparound
-        */
-       prepare_mem12();
-       MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
-       HOST_SYNC(t.vcpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
-                  GADDR_V(guest_last_page), KEY(2));
-       ASSERT_MEM_EQ(mem1, mem2, 2048);
-
-out:
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_fetch_prot_override_not_enabled(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
-       vm_vaddr_t guest_0_page, guest_last_page;
-
-       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
-       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
-       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
-               print_skip("did not allocate guest pages at required positions");
-               goto out;
-       }
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
-
-out:
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_fetch_prot_override_enabled(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
-       vm_vaddr_t guest_0_page, guest_last_page;
-
-       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
-       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
-       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
-               print_skip("did not allocate guest pages at required positions");
-               goto out;
-       }
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /*
-        * vcpu, mismatching keys on fetch,
-        * fetch protection override does not apply because memory range exceeded
-        */
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
-                  GADDR_V(guest_last_page), KEY(2));
-       /* vm, fetch protected override does not apply */
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
-
-out:
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void guest_idle(void)
-{
-       GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
-       for (;;)
-               GUEST_SYNC(STAGE_IDLED);
-}
-
-static void _test_errors_common(struct test_info info, enum mop_target target, int size)
-{
-       int rv;
-
-       /* Bad size: */
-       rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
-
-       /* Zero size: */
-       rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
-                   "ioctl allows 0 as size");
-
-       /* Bad flags: */
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
-
-       /* Bad guest address: */
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
-       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
-       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
-
-       /* Bad host address: */
-       rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && errno == EFAULT,
-                   "ioctl does not report bad host memory address");
-
-       /* Bad key: */
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
-}
-
-static void test_errors(void)
-{
-       struct test_default t = test_default_init(guest_idle);
-       int rv;
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       _test_errors_common(t.vcpu, LOGICAL, t.size);
-       _test_errors_common(t.vm, ABSOLUTE, t.size);
-
-       /* Bad operation: */
-       rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-       /* virtual addresses are not translated when passing INVALID */
-       rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-
-       /* Bad access register: */
-       t.run->psw_mask &= ~(3UL << (63 - 17));
-       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
-       HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
-       rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
-       t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
-       HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
-
-       /* Check that the SIDA calls are rejected for non-protected guests */
-       rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
-       TEST_ASSERT(rv == -1 && errno == EINVAL,
-                   "ioctl does not reject SIDA_READ in non-protected mode");
-       rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
-       TEST_ASSERT(rv == -1 && errno == EINVAL,
-                   "ioctl does not reject SIDA_WRITE in non-protected mode");
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_cmpxchg(void)
-{
-       struct test_default t = test_default_init(guest_idle);
-       __uint128_t old;
-       int rv, i, power = 1;
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       for (i = 0; i < 32; i++) {
-               if (i == power) {
-                       power *= 2;
-                       continue;
-               }
-               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
-                            CMPXCHG_OLD(&old));
-               TEST_ASSERT(rv == -1 && errno == EINVAL,
-                           "ioctl allows bad size for cmpxchg");
-       }
-       for (i = 1; i <= 16; i *= 2) {
-               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
-                            CMPXCHG_OLD(&old));
-               TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
-       }
-       for (i = 2; i <= 16; i *= 2) {
-               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
-                            CMPXCHG_OLD(&old));
-               TEST_ASSERT(rv == -1 && errno == EINVAL,
-                           "ioctl allows bad alignment for cmpxchg");
-       }
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-int main(int argc, char *argv[])
-{
-       int extension_cap, idx;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
-       extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
-
-       struct testdef {
-               const char *name;
-               void (*test)(void);
-               bool requirements_met;
-       } testlist[] = {
-               {
-                       .name = "simple copy",
-                       .test = test_copy,
-                       .requirements_met = true,
-               },
-               {
-                       .name = "generic error checks",
-                       .test = test_errors,
-                       .requirements_met = true,
-               },
-               {
-                       .name = "copy with storage keys",
-                       .test = test_copy_key,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "cmpxchg with storage keys",
-                       .test = test_cmpxchg_key,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "concurrently cmpxchg with storage keys",
-                       .test = test_cmpxchg_key_concurrent,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "copy with key storage protection override",
-                       .test = test_copy_key_storage_prot_override,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "copy with key fetch protection",
-                       .test = test_copy_key_fetch_prot,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "copy with key fetch protection override",
-                       .test = test_copy_key_fetch_prot_override,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "copy with access register mode",
-                       .test = test_copy_access_register,
-                       .requirements_met = true,
-               },
-               {
-                       .name = "error checks with key",
-                       .test = test_errors_key,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks for cmpxchg with key",
-                       .test = test_errors_cmpxchg_key,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "error checks for cmpxchg",
-                       .test = test_errors_cmpxchg,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "termination",
-                       .test = test_termination,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks with key storage protection override",
-                       .test = test_errors_key_storage_prot_override,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks without key fetch prot override",
-                       .test = test_errors_key_fetch_prot_override_not_enabled,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks with key fetch prot override",
-                       .test = test_errors_key_fetch_prot_override_enabled,
-                       .requirements_met = extension_cap > 0,
-               },
-       };
-
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               if (testlist[idx].requirements_met) {
-                       testlist[idx].test();
-                       ksft_test_result_pass("%s\n", testlist[idx].name);
-               } else {
-                       ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
-                                             testlist[idx].name, extension_cap);
-               }
-       }
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c

deleted file mode 100644 (file)

index b58f75b..0000000
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ /dev/null
@@ -1,313 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x CPU resets
- *
- * Copyright (C) 2020, IBM
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-
-#define LOCAL_IRQS 32
-
-#define ARBITRARY_NON_ZERO_VCPU_ID 3
-
-struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-
-static uint8_t regs_null[512];
-
-static void guest_code_initial(void)
-{
-       /* set several CRs to "safe" value */
-       unsigned long cr2_59 = 0x10;    /* enable guarded storage */
-       unsigned long cr8_63 = 0x1;     /* monitor mask = 1 */
-       unsigned long cr10 = 1;         /* PER START */
-       unsigned long cr11 = -1;        /* PER END */
-
-
-       /* Dirty registers */
-       asm volatile (
-               "       lghi    2,0x11\n"       /* Round toward 0 */
-               "       sfpc    2\n"            /* set fpc to !=0 */
-               "       lctlg   2,2,%0\n"
-               "       lctlg   8,8,%1\n"
-               "       lctlg   10,10,%2\n"
-               "       lctlg   11,11,%3\n"
-               /* now clobber some general purpose regs */
-               "       llihh   0,0xffff\n"
-               "       llihl   1,0x5555\n"
-               "       llilh   2,0xaaaa\n"
-               "       llill   3,0x0000\n"
-               /* now clobber a floating point reg */
-               "       lghi    4,0x1\n"
-               "       cdgbr   0,4\n"
-               /* now clobber an access reg */
-               "       sar     9,4\n"
-               /* We embed diag 501 here to control register content */
-               "       diag 0,0,0x501\n"
-               :
-               : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
-               /* no clobber list as this should not return */
-               );
-}
-
-static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
-{
-       uint64_t eval_reg;
-
-       eval_reg = vcpu_get_reg(vcpu, id);
-       TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
-}
-
-static void assert_noirq(struct kvm_vcpu *vcpu)
-{
-       struct kvm_s390_irq_state irq_state;
-       int irqs;
-
-       irq_state.len = sizeof(buf);
-       irq_state.buf = (unsigned long)buf;
-       irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
-       /*
-        * irqs contains the number of retrieved interrupts. Any interrupt
-        * (notably, the emergency call interrupt we have injected) should
-        * be cleared by the resets, so this should be 0.
-        */
-       TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
-       TEST_ASSERT(!irqs, "IRQ pending");
-}
-
-static void assert_clear(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-       struct kvm_fpu fpu;
-
-       vcpu_regs_get(vcpu, &regs);
-       TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
-
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
-
-       vcpu_fpu_get(vcpu, &fpu);
-       TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
-
-       /* sync regs */
-       TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
-                   "gprs0-15 == 0 (sync_regs)");
-
-       TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
-                   "acrs0-15 == 0 (sync_regs)");
-
-       TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
-                   "vrs0-15 == 0 (sync_regs)");
-}
-
-static void assert_initial_noclear(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-
-       TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
-                   "gpr0 == 0xffff000000000000 (sync_regs)");
-       TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
-                   "gpr1 == 0x0000555500000000 (sync_regs)");
-       TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
-                   "gpr2 == 0x00000000aaaa0000 (sync_regs)");
-       TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
-                   "gpr3 == 0x0000000000000000 (sync_regs)");
-       TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
-                   "fpr0 == 0f1 (sync_regs)");
-       TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
-}
-
-static void assert_initial(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-       struct kvm_sregs sregs;
-       struct kvm_fpu fpu;
-
-       /* KVM_GET_SREGS */
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
-       TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
-                   "cr14 == 0xC2000000 (KVM_GET_SREGS)");
-       TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
-                   "cr1-13 == 0 (KVM_GET_SREGS)");
-       TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
-
-       /* sync regs */
-       TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
-                   "cr14 == 0xC2000000 (sync_regs)");
-       TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
-                   "cr1-13 == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
-
-       /* kvm_run */
-       TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
-       TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
-
-       vcpu_fpu_get(vcpu, &fpu);
-       TEST_ASSERT(!fpu.fpc, "fpc == 0");
-
-       test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
-       test_one_reg(vcpu, KVM_REG_S390_PP, 0);
-       test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
-       test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
-       test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
-}
-
-static void assert_normal_noclear(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-
-       TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
-}
-
-static void assert_normal(struct kvm_vcpu *vcpu)
-{
-       test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
-       TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
-                       "pft == 0xff.....  (sync_regs)");
-       assert_noirq(vcpu);
-}
-
-static void inject_irq(struct kvm_vcpu *vcpu)
-{
-       struct kvm_s390_irq_state irq_state;
-       struct kvm_s390_irq *irq = &buf[0];
-       int irqs;
-
-       /* Inject IRQ */
-       irq_state.len = sizeof(struct kvm_s390_irq);
-       irq_state.buf = (unsigned long)buf;
-       irq->type = KVM_S390_INT_EMERGENCY;
-       irq->u.emerg.code = vcpu->id;
-       irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
-       TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
-}
-
-static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create(1);
-
-       *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
-
-       return vm;
-}
-
-static void test_normal(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       ksft_print_msg("Testing normal reset\n");
-       vm = create_vm(&vcpu);
-
-       vcpu_run(vcpu);
-
-       inject_irq(vcpu);
-
-       vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
-
-       /* must clears */
-       assert_normal(vcpu);
-       /* must not clears */
-       assert_normal_noclear(vcpu);
-       assert_initial_noclear(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void test_initial(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       ksft_print_msg("Testing initial reset\n");
-       vm = create_vm(&vcpu);
-
-       vcpu_run(vcpu);
-
-       inject_irq(vcpu);
-
-       vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
-
-       /* must clears */
-       assert_normal(vcpu);
-       assert_initial(vcpu);
-       /* must not clears */
-       assert_initial_noclear(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void test_clear(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       ksft_print_msg("Testing clear reset\n");
-       vm = create_vm(&vcpu);
-
-       vcpu_run(vcpu);
-
-       inject_irq(vcpu);
-
-       vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
-
-       /* must clears */
-       assert_normal(vcpu);
-       assert_initial(vcpu);
-       assert_clear(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-struct testdef {
-       const char *name;
-       void (*test)(void);
-       bool needs_cap;
-} testlist[] = {
-       { "initial", test_initial, false },
-       { "normal", test_normal, true },
-       { "clear", test_clear, true },
-};
-
-int main(int argc, char *argv[])
-{
-       bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
-       int idx;
-
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
-                       testlist[idx].test();
-                       ksft_test_result_pass("%s\n", testlist[idx].name);
-               } else {
-                       ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
-                                             testlist[idx].name);
-               }
-       }
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c

deleted file mode 100644 (file)

index bba0d9a..0000000
--- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test shared zeropage handling (with/without storage keys)
- *
- * Copyright (C) 2024, Red Hat, Inc.
- */
-#include <sys/mman.h>
-
-#include <linux/fs.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-
-static void set_storage_key(void *addr, uint8_t skey)
-{
-       asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
-}
-
-static void guest_code(void)
-{
-       /* Issue some storage key instruction. */
-       set_storage_key((void *)0, 0x98);
-       GUEST_DONE();
-}
-
-/*
- * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
- * Returns < 0 on error or if nothing is mapped.
- */
-static int maps_shared_zeropage(int pagemap_fd, void *addr)
-{
-       struct page_region region;
-       struct pm_scan_arg arg = {
-               .start = (uintptr_t)addr,
-               .end = (uintptr_t)addr + 4096,
-               .vec = (uintptr_t)&region,
-               .vec_len = 1,
-               .size = sizeof(struct pm_scan_arg),
-               .category_mask = PAGE_IS_PFNZERO,
-               .category_anyof_mask = PAGE_IS_PRESENT,
-               .return_mask = PAGE_IS_PFNZERO,
-       };
-       return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
-}
-
-int main(int argc, char *argv[])
-{
-       char *mem, *page0, *page1, *page2, tmp;
-       const size_t pagesize = getpagesize();
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int pagemap_fd;
-
-       ksft_print_header();
-       ksft_set_plan(3);
-
-       /*
-        * We'll use memory that is not mapped into the VM for simplicity.
-        * Shared zeropages are enabled/disabled per-process.
-        */
-       mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
-       TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
-
-       /* Disable THP. Ignore errors on older kernels. */
-       madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
-
-       page0 = mem;
-       page1 = page0 + pagesize;
-       page2 = page1 + pagesize;
-
-       /* Can we even detect shared zeropages? */
-       pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
-       TEST_REQUIRE(pagemap_fd >= 0);
-
-       tmp = *page0;
-       asm volatile("" : "+r" (tmp));
-       TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Verify that we get the shared zeropage after VM creation. */
-       tmp = *page1;
-       asm volatile("" : "+r" (tmp));
-       ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
-                        "Shared zeropages should be enabled\n");
-
-       /*
-        * Let our VM execute a storage key instruction that should
-        * unshare all shared zeropages.
-        */
-       vcpu_run(vcpu);
-       get_ucall(vcpu, &uc);
-       TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
-
-       /* Verify that we don't have a shared zeropage anymore. */
-       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
-                        "Shared zeropage should be gone\n");
-
-       /* Verify that we don't get any new shared zeropages. */
-       tmp = *page2;
-       asm volatile("" : "+r" (tmp));
-       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
-                        "Shared zeropages should be disabled\n");
-
-       kvm_vm_free(vm);
-
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c

deleted file mode 100644 (file)

index 53def35..0000000
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ /dev/null
@@ -1,238 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for s390x KVM_CAP_SYNC_REGS
- *
- * Based on the same test for x86:
- * Copyright (C) 2018, Google LLC.
- *
- * Adaptions for s390x:
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "diag318_test_handler.h"
-#include "kselftest.h"
-
-static void guest_code(void)
-{
-       /*
-        * We embed diag 501 here instead of doing a ucall to avoid that
-        * the compiler has messed with r11 at the time of the ucall.
-        */
-       asm volatile (
-               "0:     diag 0,0,0x501\n"
-               "       ahi 11,1\n"
-               "       j 0b\n"
-       );
-}
-
-#define REG_COMPARE(reg) \
-       TEST_ASSERT(left->reg == right->reg, \
-                   "Register " #reg \
-                   " values did not match: 0x%llx, 0x%llx", \
-                   left->reg, right->reg)
-
-#define REG_COMPARE32(reg) \
-       TEST_ASSERT(left->reg == right->reg, \
-                   "Register " #reg \
-                   " values did not match: 0x%x, 0x%x", \
-                   left->reg, right->reg)
-
-
-static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
-{
-       int i;
-
-       for (i = 0; i < 16; i++)
-               REG_COMPARE(gprs[i]);
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
-{
-       int i;
-
-       for (i = 0; i < 16; i++)
-               REG_COMPARE32(acrs[i]);
-
-       for (i = 0; i < 16; i++)
-               REG_COMPARE(crs[i]);
-}
-
-#undef REG_COMPARE
-
-#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
-#define INVALID_SYNC_FIELD 0x80000000
-
-void test_read_invalid(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request reading invalid register set from VCPU. */
-       run->kvm_valid_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-
-       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-}
-
-void test_set_invalid(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request setting invalid register set into VCPU. */
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-}
-
-void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-       int rv;
-
-       /* Request and verify all valid register sets. */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
-                   (run->s390_sieic.ipa >> 8) == 0x83 &&
-                   (run->s390_sieic.ipb >> 16) == 0x501,
-                   "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
-                   run->s390_sieic.icptcode, run->s390_sieic.ipa,
-                   run->s390_sieic.ipb);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs);
-}
-
-void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-       int rv;
-
-       /* Set and verify various register values */
-       run->s.regs.gprs[11] = 0xBAD1DEA;
-       run->s.regs.acrs[0] = 1 << 11;
-
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
-
-       if (get_diag318_info() > 0) {
-               run->s.regs.diag318 = get_diag318_info();
-               run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
-       }
-
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
-                   "r11 sync regs value incorrect 0x%llx.",
-                   run->s.regs.gprs[11]);
-       TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
-                   "acr0 sync regs value incorrect 0x%x.",
-                   run->s.regs.acrs[0]);
-       TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
-                   "diag318 sync regs value incorrect 0x%llx.",
-                   run->s.regs.diag318);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs);
-}
-
-void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Clear kvm_dirty_regs bits, verify new s.regs values are
-        * overwritten with existing guest values.
-        */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = 0;
-       run->s.regs.gprs[11] = 0xDEADBEEF;
-       run->s.regs.diag318 = 0x4B1D;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
-                   "r11 sync regs value incorrect 0x%llx.",
-                   run->s.regs.gprs[11]);
-       TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
-                   "diag318 sync regs value incorrect 0x%llx.",
-                   run->s.regs.diag318);
-}
-
-struct testdef {
-       const char *name;
-       void (*test)(struct kvm_vcpu *vcpu);
-} testlist[] = {
-       { "read invalid", test_read_invalid },
-       { "set invalid", test_set_invalid },
-       { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
-       { "set+verify various regs", test_set_and_verify_various_reg_values },
-       { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
-};
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int idx;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
-
-       ksft_print_header();
-
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               testlist[idx].test(vcpu);
-               ksft_test_result_pass("%s\n", testlist[idx].name);
-       }
-
-       kvm_vm_free(vm);
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c

deleted file mode 100644 (file)

index 12d5e1c..0000000
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test TEST PROTECTION emulation.
- *
- * Copyright IBM Corp. 2021
- */
-#include <sys/mman.h>
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
-#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
-
-static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
-static uint8_t *const page_store_prot = pages[0];
-static uint8_t *const page_fetch_prot = pages[1];
-
-/* Nonzero return value indicates that address not mapped */
-static int set_storage_key(void *addr, uint8_t key)
-{
-       int not_mapped = 0;
-
-       asm volatile (
-                      "lra     %[addr], 0(0,%[addr])\n"
-               "       jz      0f\n"
-               "       llill   %[not_mapped],1\n"
-               "       j       1f\n"
-               "0:     sske    %[key], %[addr]\n"
-               "1:"
-               : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
-               : [key] "r" (key)
-               : "cc"
-       );
-       return -not_mapped;
-}
-
-enum permission {
-       READ_WRITE = 0,
-       READ = 1,
-       RW_PROTECTED = 2,
-       TRANSL_UNAVAIL = 3,
-};
-
-static enum permission test_protection(void *addr, uint8_t key)
-{
-       uint64_t mask;
-
-       asm volatile (
-                      "tprot   %[addr], 0(%[key])\n"
-               "       ipm     %[mask]\n"
-               : [mask] "=r" (mask)
-               : [addr] "Q" (*(char *)addr),
-                 [key] "a" (key)
-               : "cc"
-       );
-
-       return (enum permission)(mask >> 28);
-}
-
-enum stage {
-       STAGE_INIT_SIMPLE,
-       TEST_SIMPLE,
-       STAGE_INIT_FETCH_PROT_OVERRIDE,
-       TEST_FETCH_PROT_OVERRIDE,
-       TEST_STORAGE_PROT_OVERRIDE,
-       STAGE_END       /* must be the last entry (it's the amount of tests) */
-};
-
-struct test {
-       enum stage stage;
-       void *addr;
-       uint8_t key;
-       enum permission expected;
-} tests[] = {
-       /*
-        * We perform each test in the array by executing TEST PROTECTION on
-        * the specified addr with the specified key and checking if the returned
-        * permissions match the expected value.
-        * Both guest and host cooperate to set up the required test conditions.
-        * A central condition is that the page targeted by addr has to be DAT
-        * protected in the host mappings, in order for KVM to emulate the
-        * TEST PROTECTION instruction.
-        * Since the page tables are shared, the host uses mprotect to achieve
-        * this.
-        *
-        * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
-        * by SIE, not KVM, but there is no harm in testing them also.
-        * See Enhanced Suppression-on-Protection Facilities in the
-        * Interpretive-Execution Mode
-        */
-       /*
-        * guest: set storage key of page_store_prot to 1
-        *        storage key of page_fetch_prot to 9 and enable
-        *        protection for it
-        * STAGE_INIT_SIMPLE
-        * host: write protect both via mprotect
-        */
-       /* access key 0 matches any storage key -> RW */
-       { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
-       /* access key matches storage key -> RW */
-       { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
-       /* mismatched keys, but no fetch protection -> RO */
-       { TEST_SIMPLE, page_store_prot, 0x20, READ },
-       /* access key 0 matches any storage key -> RW */
-       { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
-       /* access key matches storage key -> RW */
-       { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
-       /* mismatched keys, fetch protection -> inaccessible */
-       { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
-       /* page 0 not mapped yet -> translation not available */
-       { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
-       /*
-        * host: try to map page 0
-        * guest: set storage key of page 0 to 9 and enable fetch protection
-        * STAGE_INIT_FETCH_PROT_OVERRIDE
-        * host: write protect page 0
-        *       enable fetch protection override
-        */
-       /* mismatched keys, fetch protection, but override applies -> RO */
-       { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
-       /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
-       { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
-       /*
-        * host: enable storage protection override
-        */
-       /* mismatched keys, but override applies (storage key 9) -> RW */
-       { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
-       /* mismatched keys, no fetch protection, override doesn't apply -> RO */
-       { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
-       /* mismatched keys, but override applies (storage key 9) -> RW */
-       { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
-       /* end marker */
-       { STAGE_END, 0, 0, 0 },
-};
-
-static enum stage perform_next_stage(int *i, bool mapped_0)
-{
-       enum stage stage = tests[*i].stage;
-       enum permission result;
-       bool skip;
-
-       for (; tests[*i].stage == stage; (*i)++) {
-               /*
-                * Some fetch protection override tests require that page 0
-                * be mapped, however, when the hosts tries to map that page via
-                * vm_vaddr_alloc, it may happen that some other page gets mapped
-                * instead.
-                * In order to skip these tests we detect this inside the guest
-                */
-               skip = tests[*i].addr < (void *)PAGE_SIZE &&
-                      tests[*i].expected != TRANSL_UNAVAIL &&
-                      !mapped_0;
-               if (!skip) {
-                       result = test_protection(tests[*i].addr, tests[*i].key);
-                       __GUEST_ASSERT(result == tests[*i].expected,
-                                      "Wanted %u, got %u, for i = %u",
-                                      tests[*i].expected, result, *i);
-               }
-       }
-       return stage;
-}
-
-static void guest_code(void)
-{
-       bool mapped_0;
-       int i = 0;
-
-       GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
-       GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
-       GUEST_SYNC(STAGE_INIT_SIMPLE);
-       GUEST_SYNC(perform_next_stage(&i, false));
-
-       /* Fetch-protection override */
-       mapped_0 = !set_storage_key((void *)0, 0x98);
-       GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
-       GUEST_SYNC(perform_next_stage(&i, mapped_0));
-
-       /* Storage-protection override */
-       GUEST_SYNC(perform_next_stage(&i, mapped_0));
-}
-
-#define HOST_SYNC_NO_TAP(vcpup, stage)                         \
-({                                                             \
-       struct kvm_vcpu *__vcpu = (vcpup);                      \
-       struct ucall uc;                                        \
-       int __stage = (stage);                                  \
-                                                               \
-       vcpu_run(__vcpu);                                       \
-       get_ucall(__vcpu, &uc);                                 \
-       if (uc.cmd == UCALL_ABORT)                              \
-               REPORT_GUEST_ASSERT(uc);                        \
-       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
-       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
-})
-
-#define HOST_SYNC(vcpu, stage)                 \
-({                                             \
-       HOST_SYNC_NO_TAP(vcpu, stage);          \
-       ksft_test_result_pass("" #stage "\n");  \
-})
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       vm_vaddr_t guest_0_page;
-
-       ksft_print_header();
-       ksft_set_plan(STAGE_END);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
-       mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
-       HOST_SYNC(vcpu, TEST_SIMPLE);
-
-       guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
-       if (guest_0_page != 0) {
-               /* Use NO_TAP so we don't get a PASS print */
-               HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
-               ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
-                                     "Did not allocate page at 0\n");
-       } else {
-               HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
-       }
-       if (guest_0_page == 0)
-               mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
-       run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
-       run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
-
-       run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
-       run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
-
-       kvm_vm_free(vm);
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c

deleted file mode 100644 (file)

index 0c11231..0000000
--- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c
+++ /dev/null
@@ -1,638 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test code for the s390x kvm ucontrol interface
- *
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Christoph Schlameuss <schlameuss@linux.ibm.com>
- */
-#include "debug_print.h"
-#include "kselftest_harness.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sie.h"
-
-#include <linux/capability.h>
-#include <linux/sizes.h>
-
-#define PGM_SEGMENT_TRANSLATION 0x10
-
-#define VM_MEM_SIZE (4 * SZ_1M)
-#define VM_MEM_EXT_SIZE (2 * SZ_1M)
-#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
-
-/* so directly declare capget to check caps without libcap */
-int capget(cap_user_header_t header, cap_user_data_t data);
-
-/**
- * In order to create user controlled virtual machines on S390,
- * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
- * as privileged user (SYS_ADMIN).
- */
-void require_ucontrol_admin(void)
-{
-       struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
-       struct __user_cap_header_struct hdr = {
-               .version = _LINUX_CAPABILITY_VERSION_3,
-       };
-       int rc;
-
-       rc = capget(&hdr, data);
-       TEST_ASSERT_EQ(0, rc);
-       TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
-}
-
-/* Test program setting some registers and looping */
-extern char test_gprs_asm[];
-asm("test_gprs_asm:\n"
-       "xgr    %r0, %r0\n"
-       "lgfi   %r1,1\n"
-       "lgfi   %r2,2\n"
-       "lgfi   %r3,3\n"
-       "lgfi   %r4,4\n"
-       "lgfi   %r5,5\n"
-       "lgfi   %r6,6\n"
-       "lgfi   %r7,7\n"
-       "0:\n"
-       "       diag    0,0,0x44\n"
-       "       ahi     %r0,1\n"
-       "       j       0b\n"
-);
-
-/* Test program manipulating memory */
-extern char test_mem_asm[];
-asm("test_mem_asm:\n"
-       "xgr    %r0, %r0\n"
-
-       "0:\n"
-       "       ahi     %r0,1\n"
-       "       st      %r1,0(%r5,%r6)\n"
-
-       "       xgr     %r1,%r1\n"
-       "       l       %r1,0(%r5,%r6)\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       j       0b\n"
-);
-
-/* Test program manipulating storage keys */
-extern char test_skey_asm[];
-asm("test_skey_asm:\n"
-       "xgr    %r0, %r0\n"
-
-       "0:\n"
-       "       ahi     %r0,1\n"
-       "       st      %r1,0(%r5,%r6)\n"
-
-       "       iske    %r1,%r6\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       sske    %r1,%r6\n"
-       "       xgr     %r1,%r1\n"
-       "       iske    %r1,%r6\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       rrbe    %r1,%r6\n"
-       "       iske    %r1,%r6\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       j       0b\n"
-);
-
-FIXTURE(uc_kvm)
-{
-       struct kvm_s390_sie_block *sie_block;
-       struct kvm_run *run;
-       uintptr_t base_gpa;
-       uintptr_t code_gpa;
-       uintptr_t base_hva;
-       uintptr_t code_hva;
-       int kvm_run_size;
-       vm_paddr_t pgd;
-       void *vm_mem;
-       int vcpu_fd;
-       int kvm_fd;
-       int vm_fd;
-};
-
-/**
- * create VM with single vcpu, map kvm_run and SIE control block for easy access
- */
-FIXTURE_SETUP(uc_kvm)
-{
-       struct kvm_s390_vm_cpu_processor info;
-       int rc;
-
-       require_ucontrol_admin();
-
-       self->kvm_fd = open_kvm_dev_path_or_exit();
-       self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
-       ASSERT_GE(self->vm_fd, 0);
-
-       kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
-                           KVM_S390_VM_CPU_PROCESSOR, &info);
-       TH_LOG("create VM 0x%llx", info.cpuid);
-
-       self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
-       ASSERT_GE(self->vcpu_fd, 0);
-
-       self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
-       ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
-                 TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
-       self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
-                   PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
-       ASSERT_NE(self->run, MAP_FAILED);
-       /**
-        * For virtual cpus that have been created with S390 user controlled
-        * virtual machines, the resulting vcpu fd can be memory mapped at page
-        * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
-        * the virtual cpu's hardware control block.
-        */
-       self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
-                         PROT_READ | PROT_WRITE, MAP_SHARED,
-                         self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
-       ASSERT_NE(self->sie_block, MAP_FAILED);
-
-       TH_LOG("VM created %p %p", self->run, self->sie_block);
-
-       self->base_gpa = 0;
-       self->code_gpa = self->base_gpa + (3 * SZ_1M);
-
-       self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
-       ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
-       self->base_hva = (uintptr_t)self->vm_mem;
-       self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
-       struct kvm_s390_ucas_mapping map = {
-               .user_addr = self->base_hva,
-               .vcpu_addr = self->base_gpa,
-               .length = VM_MEM_SIZE,
-       };
-       TH_LOG("ucas map %p %p 0x%llx",
-              (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
-       rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
-       ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
-                               rc, strerror(errno));
-
-       TH_LOG("page in %p", (void *)self->base_gpa);
-       rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
-       ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
-                               (void *)self->base_hva, rc, strerror(errno));
-
-       self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
-}
-
-FIXTURE_TEARDOWN(uc_kvm)
-{
-       munmap(self->sie_block, PAGE_SIZE);
-       munmap(self->run, self->kvm_run_size);
-       close(self->vcpu_fd);
-       close(self->vm_fd);
-       close(self->kvm_fd);
-       free(self->vm_mem);
-}
-
-TEST_F(uc_kvm, uc_sie_assertions)
-{
-       /* assert interception of Code 08 (Program Interruption) is set */
-       EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
-}
-
-TEST_F(uc_kvm, uc_attr_mem_limit)
-{
-       u64 limit;
-       struct kvm_device_attr attr = {
-               .group = KVM_S390_VM_MEM_CTRL,
-               .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
-               .addr = (unsigned long)&limit,
-       };
-       int rc;
-
-       rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
-       EXPECT_EQ(0, rc);
-       EXPECT_EQ(~0UL, limit);
-
-       /* assert set not supported */
-       rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EINVAL, errno);
-}
-
-TEST_F(uc_kvm, uc_no_dirty_log)
-{
-       struct kvm_dirty_log dlog;
-       int rc;
-
-       rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EINVAL, errno);
-}
-
-/**
- * Assert HPAGE CAP cannot be enabled on UCONTROL VM
- */
-TEST(uc_cap_hpage)
-{
-       int rc, kvm_fd, vm_fd, vcpu_fd;
-       struct kvm_enable_cap cap = {
-               .cap = KVM_CAP_S390_HPAGE_1M,
-       };
-
-       require_ucontrol_admin();
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-       vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
-       ASSERT_GE(vm_fd, 0);
-
-       /* assert hpages are not supported on ucontrol vm */
-       rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
-       EXPECT_EQ(0, rc);
-
-       /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
-       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EINVAL, errno);
-
-       /* assert HPAGE CAP is rejected after vCPU creation */
-       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
-       ASSERT_GE(vcpu_fd, 0);
-       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EBUSY, errno);
-
-       close(vcpu_fd);
-       close(vm_fd);
-       close(kvm_fd);
-}
-
-/* calculate host virtual addr from guest physical addr */
-static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
-{
-       return (void *)(self->base_hva - self->base_gpa + gpa);
-}
-
-/* map / make additional memory available */
-static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
-{
-       struct kvm_s390_ucas_mapping map = {
-               .user_addr = (u64)gpa2hva(self, vcpu_addr),
-               .vcpu_addr = vcpu_addr,
-               .length = length,
-       };
-       pr_info("ucas map %p %p 0x%llx",
-               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
-       return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
-}
-
-/* unmap previously mapped memory */
-static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
-{
-       struct kvm_s390_ucas_mapping map = {
-               .user_addr = (u64)gpa2hva(self, vcpu_addr),
-               .vcpu_addr = vcpu_addr,
-               .length = length,
-       };
-       pr_info("ucas unmap %p %p 0x%llx",
-               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
-       return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
-}
-
-/* handle ucontrol exit by mapping the accessed segment */
-static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_run *run = self->run;
-       u64 seg_addr;
-       int rc;
-
-       TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-       switch (run->s390_ucontrol.pgm_code) {
-       case PGM_SEGMENT_TRANSLATION:
-               seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
-               pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
-                       run->s390_ucontrol.trans_exc_code, seg_addr);
-               /* map / make additional memory available */
-               rc = uc_map_ext(self, seg_addr, SZ_1M);
-               TEST_ASSERT_EQ(0, rc);
-               break;
-       default:
-               TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
-       }
-}
-
-/*
- * Handle the SIEIC exit
- * * fail on codes not expected in the test cases
- * Returns if interception is handled / execution can be continued
- */
-static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-
-       /* disable KSS */
-       sie_block->cpuflags &= ~CPUSTAT_KSS;
-       /* disable skey inst interception */
-       sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
-}
-
-/*
- * Handle the instruction intercept
- * Returns if interception is handled / execution can be continued
- */
-static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-       int ilen = insn_length(sie_block->ipa >> 8);
-       struct kvm_run *run = self->run;
-
-       switch (run->s390_sieic.ipa) {
-       case 0xB229: /* ISKE */
-       case 0xB22b: /* SSKE */
-       case 0xB22a: /* RRBE */
-               uc_skey_enable(self);
-
-               /* rewind to reexecute intercepted instruction */
-               run->psw_addr = run->psw_addr - ilen;
-               pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
-               return true;
-       default:
-               return false;
-       }
-}
-
-/*
- * Handle the SIEIC exit
- * * fail on codes not expected in the test cases
- * Returns if interception is handled / execution can be continued
- */
-static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-       struct kvm_run *run = self->run;
-
-       /* check SIE interception code */
-       pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
-               run->s390_sieic.icptcode,
-               run->s390_sieic.ipa,
-               run->s390_sieic.ipb);
-       switch (run->s390_sieic.icptcode) {
-       case ICPT_INST:
-               /* end execution in caller on intercepted instruction */
-               pr_info("sie instruction interception\n");
-               return uc_handle_insn_ic(self);
-       case ICPT_KSS:
-               uc_skey_enable(self);
-               return true;
-       case ICPT_OPEREXC:
-               /* operation exception */
-               TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
-       default:
-               TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
-       }
-       return true;
-}
-
-/* verify VM state on exit */
-static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_run *run = self->run;
-
-       switch (run->exit_reason) {
-       case KVM_EXIT_S390_UCONTROL:
-               /** check program interruption code
-                * handle page fault --> ucas map
-                */
-               uc_handle_exit_ucontrol(self);
-               break;
-       case KVM_EXIT_S390_SIEIC:
-               return uc_handle_sieic(self);
-       default:
-               pr_info("exit_reason %2d not handled\n", run->exit_reason);
-       }
-       return true;
-}
-
-/* run the VM until interrupted */
-static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
-{
-       int rc;
-
-       rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
-       print_run(self->run, self->sie_block);
-       print_regs(self->run);
-       pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
-       return rc;
-}
-
-static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-
-       /* assert vm was interrupted by diag 0x0044 */
-       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
-       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
-       TEST_ASSERT_EQ(0x8300, sie_block->ipa);
-       TEST_ASSERT_EQ(0x440000, sie_block->ipb);
-}
-
-TEST_F(uc_kvm, uc_no_user_region)
-{
-       struct kvm_userspace_memory_region region = {
-               .slot = 1,
-               .guest_phys_addr = self->code_gpa,
-               .memory_size = VM_MEM_EXT_SIZE,
-               .userspace_addr = (uintptr_t)self->code_hva,
-       };
-       struct kvm_userspace_memory_region2 region2 = {
-               .slot = 1,
-               .guest_phys_addr = self->code_gpa,
-               .memory_size = VM_MEM_EXT_SIZE,
-               .userspace_addr = (uintptr_t)self->code_hva,
-       };
-
-       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
-       ASSERT_EQ(EINVAL, errno);
-
-       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
-       ASSERT_EQ(EINVAL, errno);
-}
-
-TEST_F(uc_kvm, uc_map_unmap)
-{
-       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
-       struct kvm_run *run = self->run;
-       const u64 disp = 1;
-       int rc;
-
-       /* copy test_mem_asm to code_hva / code_gpa */
-       TH_LOG("copy code %p to vm mapped memory %p / %p",
-              &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
-       memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
-
-       /* DAT disabled + 64 bit mode */
-       run->psw_mask = 0x0000000180000000ULL;
-       run->psw_addr = self->code_gpa;
-
-       /* set register content for test_mem_asm to access not mapped memory*/
-       sync_regs->gprs[1] = 0x55;
-       sync_regs->gprs[5] = self->base_gpa;
-       sync_regs->gprs[6] = VM_MEM_SIZE + disp;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
-       /* run and expect to fail with ucontrol pic segment translation */
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(1, sync_regs->gprs[0]);
-       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-
-       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
-       ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
-
-       /* fail to map memory with not segment aligned address */
-       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
-       ASSERT_GT(0, rc)
-               TH_LOG("ucas map for non segment address should fail but didn't; "
-                      "result %d not expected, %s", rc, strerror(errno));
-
-       /* map / make additional memory available */
-       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
-       ASSERT_EQ(0, rc)
-               TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       uc_assert_diag44(self);
-
-       /* assert registers and memory are in expected state */
-       ASSERT_EQ(2, sync_regs->gprs[0]);
-       ASSERT_EQ(0x55, sync_regs->gprs[1]);
-       ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
-
-       /* unmap and run loop again */
-       rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
-       ASSERT_EQ(0, rc)
-               TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(3, sync_regs->gprs[0]);
-       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
-       /* handle ucontrol exit and remap memory after previous map and unmap */
-       ASSERT_EQ(true, uc_handle_exit(self));
-}
-
-TEST_F(uc_kvm, uc_gprs)
-{
-       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
-       struct kvm_run *run = self->run;
-       struct kvm_regs regs = {};
-
-       /* Set registers to values that are different from the ones that we expect below */
-       for (int i = 0; i < 8; i++)
-               sync_regs->gprs[i] = 8;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
-       /* copy test_gprs_asm to code_hva / code_gpa */
-       TH_LOG("copy code %p to vm mapped memory %p / %p",
-              &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
-       memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
-
-       /* DAT disabled + 64 bit mode */
-       run->psw_mask = 0x0000000180000000ULL;
-       run->psw_addr = self->code_gpa;
-
-       /* run and expect interception of diag 44 */
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       uc_assert_diag44(self);
-
-       /* Retrieve and check guest register values */
-       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
-       for (int i = 0; i < 8; i++) {
-               ASSERT_EQ(i, regs.gprs[i]);
-               ASSERT_EQ(i, sync_regs->gprs[i]);
-       }
-
-       /* run and expect interception of diag 44 again */
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       uc_assert_diag44(self);
-
-       /* check continued increment of register 0 value */
-       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
-       ASSERT_EQ(1, regs.gprs[0]);
-       ASSERT_EQ(1, sync_regs->gprs[0]);
-}
-
-TEST_F(uc_kvm, uc_skey)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
-       u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
-       struct kvm_run *run = self->run;
-       const u8 skeyvalue = 0x34;
-
-       /* copy test_skey_asm to code_hva / code_gpa */
-       TH_LOG("copy code %p to vm mapped memory %p / %p",
-              &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
-       memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
-
-       /* set register content for test_skey_asm to access not mapped memory */
-       sync_regs->gprs[1] = skeyvalue;
-       sync_regs->gprs[5] = self->base_gpa;
-       sync_regs->gprs[6] = test_vaddr;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
-       /* DAT disabled + 64 bit mode */
-       run->psw_mask = 0x0000000180000000ULL;
-       run->psw_addr = self->code_gpa;
-
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(true, uc_handle_exit(self));
-       ASSERT_EQ(1, sync_regs->gprs[0]);
-
-       /* ISKE */
-       ASSERT_EQ(0, uc_run_once(self));
-
-       /*
-        * Bail out and skip the test after uc_skey_enable was executed but iske
-        * is still intercepted. Instructions are not handled by the kernel.
-        * Thus there is no need to test this here.
-        */
-       TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
-       TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
-       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
-       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
-       TEST_REQUIRE(sie_block->ipa != 0xb229);
-
-       /* ISKE contd. */
-       ASSERT_EQ(false, uc_handle_exit(self));
-       ASSERT_EQ(2, sync_regs->gprs[0]);
-       /* assert initial skey (ACC = 0, R & C = 1) */
-       ASSERT_EQ(0x06, sync_regs->gprs[1]);
-       uc_assert_diag44(self);
-
-       /* SSKE + ISKE */
-       sync_regs->gprs[1] = skeyvalue;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       ASSERT_EQ(3, sync_regs->gprs[0]);
-       ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
-       uc_assert_diag44(self);
-
-       /* RRBE + ISKE */
-       sync_regs->gprs[1] = skeyvalue;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       ASSERT_EQ(4, sync_regs->gprs[0]);
-       /* assert R reset but rest of skey unchanged */
-       ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
-       ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
-       uc_assert_diag44(self);
-}
-
-TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c

index a8267628e9ed130cffd24ad525dd3cad1c679328..86ee3385e860be47bf37b7bd8ba7d11533751459 100644 (file)
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -17,9 +17,9 @@
  #include <processor.h>
  
  /*
- * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
- * 2MB sized and aligned region so that the initial region corresponds to
- * exactly one large page.
+ * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB
+ * sized and aligned region so that the initial region corresponds to exactly
+ * one large page.
   */
  #define MEM_REGION_SIZE                0x200000
  
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c

new file mode 100644 (file)

index 0000000..f4ce5a1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES                      8
+#define TILE_SIZE                      1024
+#define XSAVE_SIZE                     ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX            1
+#define MAX_TILES                      16
+#define RESERVED_BYTES                 14
+
+#define XSAVE_HDR_OFFSET               512
+
+struct tile_config {
+       u8  palette_id;
+       u8  start_row;
+       u8  reserved[RESERVED_BYTES];
+       u16 colsb[MAX_TILES];
+       u8  rows[MAX_TILES];
+};
+
+struct tile_data {
+       u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+       u16 bytes_per_tile;
+       u16 bytes_per_row;
+       u16 max_names;
+       u16 max_rows;
+       u32 xsave_offset;
+       u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline void __ldtilecfg(void *cfg)
+{
+       asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+                    : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+       asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+                    : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+       asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+{
+       uint32_t rfbm_lo = rfbm;
+       uint32_t rfbm_hi = rfbm >> 32;
+
+       asm volatile("xsavec (%%rdi)"
+                    : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
+                    : "memory");
+}
+
+static void check_xtile_info(void)
+{
+       GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
+
+       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
+
+       xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+       GUEST_ASSERT(xtile.xsave_offset == 2816);
+       xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+       GUEST_ASSERT(xtile.xsave_size == 8192);
+       GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+
+       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+                    PALETTE_TABLE_INDEX);
+
+       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+       xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+       GUEST_ASSERT(xtile.max_names == 8);
+       xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+       GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+       xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+       GUEST_ASSERT(xtile.bytes_per_row == 64);
+       xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+       GUEST_ASSERT(xtile.max_rows == 16);
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+       int i;
+
+       /* Only palette id 1 */
+       cfg->palette_id = 1;
+       for (i = 0; i < xtile.max_names; i++) {
+               cfg->colsb[i] = xtile.bytes_per_row;
+               cfg->rows[i] = xtile.max_rows;
+       }
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+                                                   struct tile_data *tiledata,
+                                                   struct xstate *xstate)
+{
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
+                    this_cpu_has(X86_FEATURE_OSXSAVE));
+       check_xtile_info();
+       GUEST_SYNC(1);
+
+       /* xfd=0, enable amx */
+       wrmsr(MSR_IA32_XFD, 0);
+       GUEST_SYNC(2);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+       set_tilecfg(amx_cfg);
+       __ldtilecfg(amx_cfg);
+       GUEST_SYNC(3);
+       /* Check save/restore when trap to userspace */
+       __tileloadd(tiledata);
+       GUEST_SYNC(4);
+       __tilerelease();
+       GUEST_SYNC(5);
+       /*
+        * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+        * the xcomp_bv.
+        */
+       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+       GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+
+       /* xfd=0x40000, disable amx tiledata */
+       wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+       /*
+        * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+        * remains the same even when amx tiledata is disabled by IA32_XFD.
+        */
+       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+       GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
+       GUEST_SYNC(6);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+       set_tilecfg(amx_cfg);
+       __ldtilecfg(amx_cfg);
+       /* Trigger #NM exception */
+       __tileloadd(tiledata);
+       GUEST_SYNC(10);
+
+       GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+       /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
+       GUEST_SYNC(7);
+       GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+       GUEST_SYNC(8);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+       /* Clear xfd_err */
+       wrmsr(MSR_IA32_XFD_ERR, 0);
+       /* xfd=0, enable amx */
+       wrmsr(MSR_IA32_XFD, 0);
+       GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_regs regs1, regs2;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_x86_state *state;
+       int xsave_restore_size;
+       vm_vaddr_t amx_cfg, tiledata, xstate;
+       struct ucall uc;
+       u32 amx_offset;
+       int ret;
+
+       /*
+        * Note, all off-by-default features must be enabled before anything
+        * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+        */
+       vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+                   "KVM should enumerate max XSAVE size when XSAVE is supported");
+       xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
+
+       vcpu_regs_get(vcpu, &regs1);
+
+       /* Register #NM handler */
+       vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+       /* amx cfg for guest_code */
+       amx_cfg = vm_vaddr_alloc_page(vm);
+       memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+       /* amx tiledata for guest_code */
+       tiledata = vm_vaddr_alloc_pages(vm, 2);
+       memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+       /* XSAVE state for guest_code */
+       xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+       memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+       vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+
+       for (;;) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       switch (uc.args[1]) {
+                       case 1:
+                       case 2:
+                       case 3:
+                       case 5:
+                       case 6:
+                       case 7:
+                       case 8:
+                               fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+                               break;
+                       case 4:
+                       case 10:
+                               fprintf(stderr,
+                               "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+                               /* Compacted mode, get amx offset by xsave area
+                                * size subtract 8K amx size.
+                                */
+                               amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+                               state = vcpu_save_state(vcpu);
+                               void *amx_start = (void *)state->xsave + amx_offset;
+                               void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+                               /* Only check TMM0 register, 1 tile */
+                               ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+                               TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+                               kvm_x86_state_cleanup(state);
+                               break;
+                       case 9:
+                               fprintf(stderr,
+                               "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+                               break;
+                       }
+                       break;
+               case UCALL_DONE:
+                       fprintf(stderr, "UCALL_DONE\n");
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               state = vcpu_save_state(vcpu);
+               memset(&regs1, 0, sizeof(regs1));
+               vcpu_regs_get(vcpu, &regs1);
+
+               kvm_vm_release(vm);
+
+               /* Restore state in a new VM.  */
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+               kvm_x86_state_cleanup(state);
+
+               memset(&regs2, 0, sizeof(regs2));
+               vcpu_regs_get(vcpu, &regs2);
+               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                           (ulong) regs2.rdi, (ulong) regs2.rsi);
+       }
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c

new file mode 100644 (file)

index 0000000..f8916bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
+ * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS.  Start the APIC timer by
+ * programming TMICT (timer initial count) to the largest value possible (so
+ * that the timer will not expire during the test).  Then, after an arbitrary
+ * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
+ * of the expected value based on the time elapsed, the APIC bus frequency, and
+ * the programmed TDCR (timer divide configuration register).
+ */
+
+#include "apic.h"
+#include "test_util.h"
+
+/*
+ * Possible TDCR values with matching divide count. Used to modify APIC
+ * timer frequency.
+ */
+static const struct {
+       const uint32_t tdcr;
+       const uint32_t divide_count;
+} tdcrs[] = {
+       {0x0, 2},
+       {0x1, 4},
+       {0x2, 8},
+       {0x3, 16},
+       {0x8, 32},
+       {0x9, 64},
+       {0xa, 128},
+       {0xb, 1},
+};
+
+static bool is_x2apic;
+
+static void apic_enable(void)
+{
+       if (is_x2apic)
+               x2apic_enable();
+       else
+               xapic_enable();
+}
+
+static uint32_t apic_read_reg(unsigned int reg)
+{
+       return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
+}
+
+static void apic_write_reg(unsigned int reg, uint32_t val)
+{
+       if (is_x2apic)
+               x2apic_write_reg(reg, val);
+       else
+               xapic_write_reg(reg, val);
+}
+
+static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+{
+       uint64_t tsc_hz = guest_tsc_khz * 1000;
+       const uint32_t tmict = ~0u;
+       uint64_t tsc0, tsc1, freq;
+       uint32_t tmcct;
+       int i;
+
+       apic_enable();
+
+       /*
+        * Setup one-shot timer.  The vector does not matter because the
+        * interrupt should not fire.
+        */
+       apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
+
+       for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
+               apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
+               apic_write_reg(APIC_TMICT, tmict);
+
+               tsc0 = rdtsc();
+               udelay(delay_ms * 1000);
+               tmcct = apic_read_reg(APIC_TMCCT);
+               tsc1 = rdtsc();
+
+               /*
+                * Stop the timer _after_ reading the current, final count, as
+                * writing the initial counter also modifies the current count.
+                */
+               apic_write_reg(APIC_TMICT, 0);
+
+               freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
+               /* Check if measured frequency is within 5% of configured frequency. */
+               __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
+                              "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
+                              freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
+                              apic_hz, tdcrs[i].divide_count, tsc_hz);
+       }
+
+       GUEST_DONE();
+}
+
+static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
+{
+       bool done = false;
+       struct ucall uc;
+
+       while (!done) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+                       break;
+               }
+       }
+}
+
+static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+                                   bool x2apic)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int ret;
+
+       is_x2apic = x2apic;
+
+       vm = vm_create(1);
+
+       sync_global_to_guest(vm, is_x2apic);
+
+       vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+                     NSEC_PER_SEC / apic_hz);
+
+       vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
+       vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
+
+       ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+                             NSEC_PER_SEC / apic_hz);
+       TEST_ASSERT(ret < 0 && errno == EINVAL,
+                   "Setting of APIC bus frequency after vCPU is created should fail.");
+
+       if (!is_x2apic)
+               virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       test_apic_bus_clock(vcpu);
+       kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
+       puts("");
+       printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
+       printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
+       puts("");
+}
+
+int main(int argc, char *argv[])
+{
+       /*
+        * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
+        * different enough from the default 1GHz to be interesting.
+        */
+       uint64_t apic_hz = 25 * 1000 * 1000;
+       uint64_t delay_ms = 100;
+       int opt;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
+
+       while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
+               switch (opt) {
+               case 'f':
+                       apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
+                       break;
+               case 'd':
+                       delay_ms = atoi_positive("Delay in milliseconds", optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       exit(KSFT_SKIP);
+               }
+       }
+
+       run_apic_bus_clock_test(apic_hz, delay_ms, false);
+       run_apic_bus_clock_test(apic_hz, delay_ms, true);
+}
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c

new file mode 100644 (file)

index 0000000..7b3fda6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct cpuid_mask {
+       union {
+               struct {
+                       u32 eax;
+                       u32 ebx;
+                       u32 ecx;
+                       u32 edx;
+               };
+               u32 regs[4];
+       };
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+       int i;
+       u32 eax, ebx, ecx, edx;
+
+       for (i = 0; i < guest_cpuid->nent; i++) {
+               __cpuid(guest_cpuid->entries[i].function,
+                       guest_cpuid->entries[i].index,
+                       &eax, &ebx, &ecx, &edx);
+
+               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
+       }
+
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+       GUEST_SYNC(1);
+
+       test_guest_cpuids(guest_cpuid);
+
+       GUEST_SYNC(2);
+
+       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
+
+       GUEST_DONE();
+}
+
+static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
+{
+       struct cpuid_mask mask;
+
+       memset(&mask, 0xff, sizeof(mask));
+
+       switch (entry->function) {
+       case 0x1:
+               mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
+               break;
+       case 0x7:
+               mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
+               break;
+       case 0xd:
+               /*
+                * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
+                * XCR0 and IA32_XSS MSR values.
+                */
+               if (entry->index < 2)
+                       mask.ebx = 0;
+               break;
+       }
+       return mask;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+                          const struct kvm_cpuid2 *cpuid2)
+{
+       const struct kvm_cpuid_entry2 *e1, *e2;
+       int i;
+
+       TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+                   "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+       for (i = 0; i < cpuid1->nent; i++) {
+               struct cpuid_mask mask;
+
+               e1 = &cpuid1->entries[i];
+               e2 = &cpuid2->entries[i];
+
+               TEST_ASSERT(e1->function == e2->function &&
+                           e1->index == e2->index && e1->flags == e2->flags,
+                           "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
+                           i, e1->function, e1->index, e1->flags,
+                           e2->function, e2->index, e2->flags);
+
+               /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
+               mask = get_const_cpuid_mask(e1);
+
+               TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
+                           (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
+                           (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
+                           (e1->edx & mask.edx) == (e2->edx & mask.edx),
+                           "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+                           e1->function, e1->index,
+                           e1->eax & mask.eax, e1->ebx & mask.ebx,
+                           e1->ecx & mask.ecx, e1->edx & mask.edx,
+                           e2->eax & mask.eax, e2->ebx & mask.ebx,
+                           e2->ecx & mask.ecx, e2->edx & mask.edx);
+       }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage + 1,
+                           "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage + 1, (ulong)uc.args[1]);
+               return;
+       case UCALL_DONE:
+               return;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_ASSERT(false, "Unexpected exit: %s",
+                           exit_reason_str(vcpu->run->exit_reason));
+       }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+       int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+       vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+       struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+       memcpy(guest_cpuids, cpuid, size);
+
+       *p_gva = gva;
+       return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *ent;
+       int rc;
+       u32 eax, ebx, x;
+
+       /* Setting unmodified CPUID is allowed */
+       rc = __vcpu_set_cpuid(vcpu);
+       TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+       /* Changing CPU features is forbidden */
+       ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+       ebx = ent->ebx;
+       ent->ebx--;
+       rc = __vcpu_set_cpuid(vcpu);
+       TEST_ASSERT(rc, "Changing CPU features should fail");
+       ent->ebx = ebx;
+
+       /* Changing MAXPHYADDR is forbidden */
+       ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+       eax = ent->eax;
+       x = eax & 0xff;
+       ent->eax = (eax & ~0xffu) | (x - 1);
+       rc = __vcpu_set_cpuid(vcpu);
+       TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+       ent->eax = eax;
+}
+
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+       int i, r;
+
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+       TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+                   "KVM didn't update nent on success, wanted %u, got %u",
+                   vcpu->cpuid->nent, cpuid->nent);
+
+       for (i = 0; i < vcpu->cpuid->nent; i++) {
+               cpuid->nent = i;
+               r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+               TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+               TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+       }
+       free(cpuid);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t cpuid_gva;
+       struct kvm_vm *vm;
+       int stage;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+       vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+       vcpu_args_set(vcpu, 1, cpuid_gva);
+
+       for (stage = 0; stage < 3; stage++)
+               run_vcpu(vcpu, stage);
+
+       set_cpuid_after_run(vcpu);
+
+       test_get_cpuid2(vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c

new file mode 100644 (file)

index 0000000..28cc664
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAGIC_HYPERCALL_PORT   0x80
+
+static void guest_code(void)
+{
+       u32 regs[4] = {
+               [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
+               [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
+       };
+
+       /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
+       GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
+
+       /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+       /*
+        * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
+        * and then restore CR4.  Do this all in  assembly to ensure no AVX
+        * instructions are executed while OSXSAVE=0.
+        */
+       asm volatile (
+               "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
+               "cpuid\n\t"
+               "mov %%rdi, %%cr4\n\t"
+               : "+a" (regs[KVM_CPUID_EAX]),
+                 "=b" (regs[KVM_CPUID_EBX]),
+                 "+c" (regs[KVM_CPUID_ECX]),
+                 "=d" (regs[KVM_CPUID_EDX])
+               : "D" (get_cr4())
+       );
+
+       /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
+       GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
+
+       /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_sregs sregs;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       while (1) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
+                   vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
+                       /* emulate hypervisor clearing CR4.OSXSAVE */
+                       vcpu_sregs_get(vcpu, &sregs);
+                       sregs.cr4 &= ~X86_CR4_OSXSAVE;
+                       vcpu_sregs_set(vcpu, &sregs);
+                       continue;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c

new file mode 100644 (file)

index 0000000..2d814c1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest debug register tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+#define DR6_BD         (1 << 13)
+#define DR7_GD         (1 << 13)
+
+#define IRQ_VECTOR 0xAA
+
+/* For testing data access debug BP */
+uint32_t guest_value;
+
+extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+
+static void guest_code(void)
+{
+       /* Create a pending interrupt on current vCPU */
+       x2apic_enable();
+       x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+                        APIC_DM_FIXED | IRQ_VECTOR);
+
+       /*
+        * Software BP tests.
+        *
+        * NOTE: sw_bp need to be before the cmd here, because int3 is an
+        * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
+        * capture it using the vcpu exception bitmap).
+        */
+       asm volatile("sw_bp: int3");
+
+       /* Hardware instruction BP test */
+       asm volatile("hw_bp: nop");
+
+       /* Hardware data BP test */
+       asm volatile("mov $1234,%%rax;\n\t"
+                    "mov %%rax,%0;\n\t write_data:"
+                    : "=m" (guest_value) : : "rax");
+
+       /*
+        * Single step test, covers 2 basic instructions and 2 emulated
+        *
+        * Enable interrupts during the single stepping to see that pending
+        * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
+        *
+        * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
+        * exits to userspace due to single-step being enabled.
+        */
+       asm volatile("ss_start: "
+                    "sti\n\t"
+                    "xor %%eax,%%eax\n\t"
+                    "cpuid\n\t"
+                    "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
+                    "wrmsr\n\t"
+                    "cli\n\t"
+                    : : : "eax", "ebx", "ecx", "edx");
+
+       /* DR6.BD test */
+       asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+       GUEST_DONE();
+}
+
+#define  CAST_TO_RIP(v)  ((unsigned long long)&(v))
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+       struct kvm_regs regs;
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip += insn_len;
+       vcpu_regs_set(vcpu, &regs);
+}
+
+int main(void)
+{
+       struct kvm_guest_debug debug;
+       unsigned long long target_dr6, target_rip;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       uint64_t cmd;
+       int i;
+       /* Instruction lengths starting at ss_start */
+       int ss_size[6] = {
+               1,              /* sti*/
+               2,              /* xor */
+               2,              /* cpuid */
+               5,              /* mov */
+               2,              /* rdmsr */
+               1,              /* cli */
+       };
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       /* Test software BPs - int3 */
+       memset(&debug, 0, sizeof(debug));
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+       vcpu_guest_debug_set(vcpu, &debug);
+       vcpu_run(vcpu);
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                   run->debug.arch.exception == BP_VECTOR &&
+                   run->debug.arch.pc == CAST_TO_RIP(sw_bp),
+                   "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
+                   run->exit_reason, run->debug.arch.exception,
+                   run->debug.arch.pc, CAST_TO_RIP(sw_bp));
+       vcpu_skip_insn(vcpu, 1);
+
+       /* Test instruction HW BP over DR[0-3] */
+       for (i = 0; i < 4; i++) {
+               memset(&debug, 0, sizeof(debug));
+               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+               debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
+               debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
+               vcpu_guest_debug_set(vcpu, &debug);
+               vcpu_run(vcpu);
+               target_dr6 = 0xffff0ff0 | (1UL << i);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                           run->debug.arch.exception == DB_VECTOR &&
+                           run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
+                           run->debug.arch.dr6 == target_dr6,
+                           "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           i, run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, CAST_TO_RIP(hw_bp),
+                           run->debug.arch.dr6, target_dr6);
+       }
+       /* Skip "nop" */
+       vcpu_skip_insn(vcpu, 1);
+
+       /* Test data access HW BP over DR[0-3] */
+       for (i = 0; i < 4; i++) {
+               memset(&debug, 0, sizeof(debug));
+               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+               debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
+               debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
+                   (0x000d0000UL << (4*i));
+               vcpu_guest_debug_set(vcpu, &debug);
+               vcpu_run(vcpu);
+               target_dr6 = 0xffff0ff0 | (1UL << i);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                           run->debug.arch.exception == DB_VECTOR &&
+                           run->debug.arch.pc == CAST_TO_RIP(write_data) &&
+                           run->debug.arch.dr6 == target_dr6,
+                           "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           i, run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, CAST_TO_RIP(write_data),
+                           run->debug.arch.dr6, target_dr6);
+               /* Rollback the 4-bytes "mov" */
+               vcpu_skip_insn(vcpu, -7);
+       }
+       /* Skip the 4-bytes "mov" */
+       vcpu_skip_insn(vcpu, 7);
+
+       /* Test single step */
+       target_rip = CAST_TO_RIP(ss_start);
+       target_dr6 = 0xffff4ff0ULL;
+       for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
+               target_rip += ss_size[i];
+               memset(&debug, 0, sizeof(debug));
+               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+                               KVM_GUESTDBG_BLOCKIRQ;
+               debug.arch.debugreg[7] = 0x00000400;
+               vcpu_guest_debug_set(vcpu, &debug);
+               vcpu_run(vcpu);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                           run->debug.arch.exception == DB_VECTOR &&
+                           run->debug.arch.pc == target_rip &&
+                           run->debug.arch.dr6 == target_dr6,
+                           "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           i, run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+                           target_dr6);
+       }
+
+       /* Finally test global disable */
+       memset(&debug, 0, sizeof(debug));
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+       debug.arch.debugreg[7] = 0x400 | DR7_GD;
+       vcpu_guest_debug_set(vcpu, &debug);
+       vcpu_run(vcpu);
+       target_dr6 = 0xffff0ff0 | DR6_BD;
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                   run->debug.arch.exception == DB_VECTOR &&
+                   run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
+                   run->debug.arch.dr6 == target_dr6,
+                           "DR7.GD: exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+                           target_dr6);
+
+       /* Disable all debug controls, run to the end */
+       memset(&debug, 0, sizeof(debug));
+       vcpu_guest_debug_set(vcpu, &debug);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       cmd = get_ucall(vcpu, &uc);
+       TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c

new file mode 100644 (file)

index 0000000..2929c06
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "ucall_common.h"
+
+#define VCPUS          2
+#define SLOTS          2
+#define ITERATIONS     2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+       uint64_t pages_4k;
+       uint64_t pages_2m;
+       uint64_t pages_1g;
+       uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+       stats->pages_4k = vm_get_stat(vm, "pages_4k");
+       stats->pages_2m = vm_get_stat(vm, "pages_2m");
+       stats->pages_1g = vm_get_stat(vm, "pages_1g");
+       stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+       pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+                stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+                stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+       int i;
+
+       iteration++;
+       for (i = 0; i < VCPUS; i++) {
+               while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+                      iteration)
+                       ;
+       }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+       struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+       int vcpu_idx = vcpu_args->vcpu_idx;
+
+       while (!READ_ONCE(host_quit)) {
+               int current_iteration = READ_ONCE(iteration);
+
+               vcpu_run(vcpu);
+
+               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+               vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+               /* Wait for the start of the next iteration to be signaled. */
+               while (current_iteration == READ_ONCE(iteration) &&
+                      READ_ONCE(iteration) >= 0 &&
+                      !READ_ONCE(host_quit))
+                       ;
+       }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+       struct kvm_vm *vm;
+       unsigned long **bitmaps;
+       uint64_t guest_num_pages;
+       uint64_t host_num_pages;
+       uint64_t pages_per_slot;
+       int i;
+       struct kvm_page_stats stats_populated;
+       struct kvm_page_stats stats_dirty_logging_enabled;
+       struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+       struct kvm_page_stats stats_clear_pass[ITERATIONS];
+       struct kvm_page_stats stats_dirty_logging_disabled;
+       struct kvm_page_stats stats_repopulated;
+
+       vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+                                SLOTS, backing_src, false);
+
+       guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+       host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+       pages_per_slot = host_num_pages / SLOTS;
+       TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+       TEST_ASSERT(!(host_num_pages % 512),
+                   "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
+
+       bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+       if (dirty_log_manual_caps)
+               vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+                             dirty_log_manual_caps);
+
+       /* Start the iterations */
+       iteration = -1;
+       host_quit = false;
+
+       for (i = 0; i < VCPUS; i++)
+               vcpu_last_completed_iteration[i] = -1;
+
+       memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+       run_vcpu_iteration(vm);
+       get_page_stats(vm, &stats_populated, "populating memory");
+
+       /* Enable dirty logging */
+       memstress_enable_dirty_logging(vm, SLOTS);
+
+       get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+       while (iteration < ITERATIONS) {
+               run_vcpu_iteration(vm);
+               get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+                              "dirtying memory");
+
+               memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+               if (dirty_log_manual_caps) {
+                       memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+                       get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+               }
+       }
+
+       /* Disable dirty logging */
+       memstress_disable_dirty_logging(vm, SLOTS);
+
+       get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+       /* Run vCPUs again to fault pages back in. */
+       run_vcpu_iteration(vm);
+       get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+       /*
+        * Tell the vCPU threads to quit.  No need to manually check that vCPUs
+        * have stopped running after disabling dirty logging, the join will
+        * wait for them to exit.
+        */
+       host_quit = true;
+       memstress_join_vcpu_threads(VCPUS);
+
+       memstress_free_bitmaps(bitmaps, SLOTS);
+       memstress_destroy_vm(vm);
+
+       TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+                       stats_populated.pages_1g * 512 * 512), host_num_pages);
+
+       /*
+        * Check that all huge pages were split. Since large pages can only
+        * exist in the data slot, and the vCPUs should have dirtied all pages
+        * in the data slot, there should be no huge pages left after splitting.
+        * Splitting happens at dirty log enable time without
+        * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+        * with that capability.
+        */
+       if (dirty_log_manual_caps) {
+               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+               TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
+                           host_num_pages, stats_clear_pass[0].pages_4k);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+       } else {
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+               TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
+                           host_num_pages, stats_dirty_logging_enabled.pages_4k);
+       }
+
+       /*
+        * Once dirty logging is disabled and the vCPUs have touched all their
+        * memory again, the hugepage counts should be the same as they were
+        * right after initial population of memory.
+        */
+       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+              name);
+       puts("");
+       printf(" -b: specify the size of the memory region which should be\n"
+              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+              "     (default: 1G)\n");
+       backing_src_help("-s");
+       puts("");
+}
+
+int main(int argc, char *argv[])
+{
+       int opt;
+
+       TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+       TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+       while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+               switch (opt) {
+               case 'b':
+                       guest_percpu_mem_size = parse_size(optarg);
+                       break;
+               case 'h':
+                       help(argv[0]);
+                       exit(0);
+               case 's':
+                       backing_src = parse_backing_src_type(optarg);
+                       break;
+               default:
+                       help(argv[0]);
+                       exit(1);
+               }
+       }
+
+       if (!is_backing_src_hugetlb(backing_src)) {
+               pr_info("This test will only work reliably with HugeTLB memory. "
+                       "It can work with THP, but that is best effort.\n");
+       }
+
+       guest_modes_append_default();
+
+       dirty_log_manual_caps = 0;
+       for_each_guest_mode(run_test, NULL);
+
+       dirty_log_manual_caps =
+               kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+       if (dirty_log_manual_caps) {
+               dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+                                         KVM_DIRTY_LOG_INITIALLY_SET);
+               for_each_guest_mode(run_test, NULL);
+       } else {
+               pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c

new file mode 100644 (file)

index 0000000..8105547
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+#include "flds_emulation.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+#define MMIO_GPA       0x700000000
+#define MMIO_GVA       MMIO_GPA
+
+static void guest_code(void)
+{
+       /* Execute flds with an MMIO address to force KVM to emulate it. */
+       flds(MMIO_GVA);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+       virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+       vcpu_run(vcpu);
+       handle_flds_emulation_failure_exit(vcpu);
+       vcpu_run(vcpu);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c

new file mode 100644 (file)

index 0000000..a72f13a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static bool is_kvm_controlled_msr(uint32_t msr)
+{
+       return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
+}
+
+/*
+ * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
+ * MSR, and doesn't allow setting the hidden version.
+ */
+static bool is_hidden_vmx_msr(uint32_t msr)
+{
+       switch (msr) {
+       case MSR_IA32_VMX_PINBASED_CTLS:
+       case MSR_IA32_VMX_PROCBASED_CTLS:
+       case MSR_IA32_VMX_EXIT_CTLS:
+       case MSR_IA32_VMX_ENTRY_CTLS:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool is_quirked_msr(uint32_t msr)
+{
+       return msr != MSR_AMD64_DE_CFG;
+}
+
+static void test_feature_msr(uint32_t msr)
+{
+       const uint64_t supported_mask = kvm_get_feature_msr(msr);
+       uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /*
+        * Don't bother testing KVM-controlled MSRs beyond verifying that the
+        * MSR can be read from userspace.  Any value is effectively legal, as
+        * KVM is bound by x86 architecture, not by ABI.
+        */
+       if (is_kvm_controlled_msr(msr))
+               return;
+
+       /*
+        * More goofy behavior.  KVM reports the host CPU's actual revision ID,
+        * but initializes the vCPU's revision ID to an arbitrary value.
+        */
+       if (msr == MSR_IA32_UCODE_REV)
+               reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
+
+       /*
+        * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
+        * full set of features supported by KVM.  For non-quirked MSRs, and
+        * when the quirk is disabled, KVM must zero-initialize the MSR and let
+        * userspace do the configuration.
+        */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
+                   "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
+                   reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
+                   vcpu_get_msr(vcpu, msr));
+       if (!is_hidden_vmx_msr(msr))
+               vcpu_set_msr(vcpu, msr, supported_mask);
+       kvm_vm_free(vm);
+
+       if (is_hidden_vmx_msr(msr))
+               return;
+
+       if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
+           !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+               return;
+
+       vm = vm_create(1);
+       vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
+
+       vcpu = vm_vcpu_add(vm, 0, NULL);
+       TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
+                   "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
+                   msr, vcpu_get_msr(vcpu, msr));
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       const struct kvm_msr_list *feature_list;
+       int i;
+
+       /*
+        * Skip the entire test if MSR_FEATURES isn't supported, other tests
+        * will cover the "regular" list of MSRs, the coverage here is purely
+        * opportunistic and not interesting on its own.
+        */
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+       (void)kvm_get_msr_index_list();
+
+       feature_list = kvm_get_feature_msr_index_list();
+       for (i = 0; i < feature_list->nmsrs; i++)
+               test_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c

new file mode 100644 (file)

index 0000000..762628f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "kvm_test_harness.h"
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE    3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       regs->rax = -EFAULT;
+       regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+       uint64_t ret;
+
+       asm volatile("hypercall_insn:\n\t"
+                    ".byte 0xcc,0xcc,0xcc\n\t"
+                    : "=a"(ret)
+                    : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+                    : "memory");
+
+       return ret;
+}
+
+static void guest_main(void)
+{
+       const uint8_t *native_hypercall_insn;
+       const uint8_t *other_hypercall_insn;
+       uint64_t ret;
+
+       if (host_cpu_is_intel) {
+               native_hypercall_insn = vmx_vmcall;
+               other_hypercall_insn  = svm_vmmcall;
+       } else if (host_cpu_is_amd) {
+               native_hypercall_insn = svm_vmmcall;
+               other_hypercall_insn  = vmx_vmcall;
+       } else {
+               GUEST_ASSERT(0);
+               /* unreachable */
+               return;
+       }
+
+       memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+       ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+       /*
+        * If the quirk is disabled, verify that guest_ud_handler() "returned"
+        * -EFAULT and that KVM did NOT patch the hypercall.  If the quirk is
+        * enabled, verify that the hypercall succeeded and that KVM patched in
+        * the "right" hypercall.
+        */
+       if (quirk_disabled) {
+               GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+               GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+                            HYPERCALL_INSN_SIZE));
+       } else {
+               GUEST_ASSERT(!ret);
+               GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+                            HYPERCALL_INSN_SIZE));
+       }
+
+       GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+               break;
+       case UCALL_DONE:
+               return;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+                         uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+       }
+}
+
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
+{
+       struct kvm_vm *vm = vcpu->vm;
+
+       vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+       if (disable_quirk)
+               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+                             KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+       quirk_disabled = disable_quirk;
+       sync_global_to_guest(vm, quirk_disabled);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       enter_guest(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h

new file mode 100644 (file)

index 0000000..37b1a9f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/flds_emulation.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+       __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+       uint8_t *insn_bytes;
+       uint64_t flags;
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+
+       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+                   "Unexpected suberror: %u",
+                   run->emulation_failure.suberror);
+
+       flags = run->emulation_failure.flags;
+       TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+                   flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+                   "run->emulation_failure is missing instruction bytes");
+
+       TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+                   "Expected a 2-byte opcode for 'flds', got %d bytes",
+                   run->emulation_failure.insn_size);
+
+       insn_bytes = run->emulation_failure.insn_bytes;
+       TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+                   "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
+                   insn_bytes[0], insn_bytes[1]);
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip += 2;
+       vcpu_regs_set(vcpu, &regs);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c

new file mode 100644 (file)

index 0000000..10b1b0b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+       const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+       const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+       const uint64_t legal = ignored | valid;
+       uint64_t val = BIT_ULL(bit);
+       uint64_t actual;
+       int r;
+
+       r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+       TEST_ASSERT(val & ~legal ? !r : r == 1,
+                   "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+                   val, val & ~legal ? "fail" : "succeed");
+
+       actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+       TEST_ASSERT(actual == (val & valid),
+                   "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+                   bit, actual, (val & valid));
+
+       vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       unsigned int bit;
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       for (bit = 0; bit < BITS_PER_LONG; bit++)
+               test_hwcr_bit(vcpu, bit);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c

new file mode 100644 (file)

index 0000000..e058bc6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+       volatile u32 tsc_sequence;
+       u32 reserved1;
+       volatile u64 tsc_scale;
+       volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+       union {
+               u64 ll;
+               struct {
+                       u32 low, high;
+               } l;
+       } rm, rn, rh, a0, b0;
+       u64 c;
+
+       a0.ll = a;
+       b0.ll = b;
+
+       rm.ll = (u64)a0.l.low * b0.l.high;
+       rn.ll = (u64)a0.l.high * b0.l.low;
+       rh.ll = (u64)a0.l.high * b0.l.high;
+
+       rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+       rh.l.high = (c >> 32) + rh.l.high;
+
+       return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+       int i;
+
+       for (i = 0; i < 100000000; i++)
+               asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+       u64 tsc_freq, r1, r2, t1, t2;
+       s64 delta_ns;
+
+       tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+       GUEST_ASSERT(tsc_freq > 0);
+
+       /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+       r1 = rdtsc();
+       t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       r1 = (r1 + rdtsc()) / 2;
+       nop_loop();
+       r2 = rdtsc();
+       t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       r2 = (r2 + rdtsc()) / 2;
+
+       GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+       if (delta_ns < 0)
+               delta_ns = -delta_ns;
+
+       /* 1% tolerance */
+       GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+       return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+       u64 r1, r2, t1, t2;
+
+       /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+       t1 = get_tscpage_ts(tsc_page);
+       r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+       /* 10 ms tolerance */
+       GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+       nop_loop();
+
+       t2 = get_tscpage_ts(tsc_page);
+       r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+       u64 tsc_scale, tsc_offset;
+
+       /* Set Guest OS id to enable Hyper-V emulation */
+       GUEST_SYNC(1);
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       GUEST_SYNC(2);
+
+       check_tsc_msr_rdtsc();
+
+       GUEST_SYNC(3);
+
+       /* Set up TSC page is disabled state, check that it's clean */
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+       GUEST_ASSERT(tsc_page->tsc_scale == 0);
+       GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+       GUEST_SYNC(4);
+
+       /* Set up TSC page is enabled state */
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+       GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+       GUEST_SYNC(5);
+
+       check_tsc_msr_tsc_page(tsc_page);
+
+       GUEST_SYNC(6);
+
+       tsc_offset = tsc_page->tsc_offset;
+       /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+       GUEST_SYNC(7);
+       /* Sanity check TSC page timestamp, it should be close to 0 */
+       GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+       GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+       nop_loop();
+
+       /*
+        * Enable Re-enlightenment and check that TSC page stays constant across
+        * KVM_SET_CLOCK.
+        */
+       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+       tsc_offset = tsc_page->tsc_offset;
+       tsc_scale = tsc_page->tsc_scale;
+       GUEST_SYNC(8);
+       GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+       GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+       GUEST_SYNC(9);
+
+       check_tsc_msr_tsc_page(tsc_page);
+
+       /*
+        * Disable re-enlightenment and TSC page, check that KVM doesn't update
+        * it anymore.
+        */
+       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+       memset(tsc_page, 0, sizeof(*tsc_page));
+
+       GUEST_SYNC(10);
+       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+       GUEST_ASSERT(tsc_page->tsc_offset == 0);
+       GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+       GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+       u64 tsc_freq, r1, r2, t1, t2;
+       s64 delta_ns;
+
+       tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+       TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+       /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+       r1 = rdtsc();
+       t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+       r1 = (r1 + rdtsc()) / 2;
+       nop_loop();
+       r2 = rdtsc();
+       t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+       r2 = (r2 + rdtsc()) / 2;
+
+       TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+       if (delta_ns < 0)
+               delta_ns = -delta_ns;
+
+       /* 1% tolerance */
+       TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+                   "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+                   (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       vm_vaddr_t tsc_page_gva;
+       int stage;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       vcpu_set_hv_cpuid(vcpu);
+
+       tsc_page_gva = vm_vaddr_alloc_page(vm);
+       memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+       TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+               "TSC page has to be page aligned");
+       vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+       host_check_tsc_msr_rdtsc(vcpu);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       /* Keep in sync with guest_main() */
+                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
+                                   stage);
+                       goto out;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage,
+                           "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               /* Reset kvmclock triggering TSC page update */
+               if (stage == 7 || stage == 8 || stage == 10) {
+                       struct kvm_clock_data clock = {0};
+
+                       vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+               }
+       }
+
+out:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c

new file mode 100644 (file)

index 0000000..4f5881d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_HYPERV_CPUID
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+static void guest_code(void)
+{
+}
+
+static bool smt_possible(void)
+{
+       char buf[16];
+       FILE *f;
+       bool res = true;
+
+       f = fopen("/sys/devices/system/cpu/smt/control", "r");
+       if (f) {
+               if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+                       if (!strncmp(buf, "forceoff", 8) ||
+                           !strncmp(buf, "notsupported", 12))
+                               res = false;
+               }
+               fclose(f);
+       }
+
+       return res;
+}
+
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
+                         bool evmcs_expected)
+{
+       int i;
+       int nent_expected = 10;
+       u32 test_val;
+
+       TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
+                   "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+                   " (returned %d)",
+                   nent_expected, hv_cpuid_entries->nent);
+
+       for (i = 0; i < hv_cpuid_entries->nent; i++) {
+               const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+
+               TEST_ASSERT((entry->function >= 0x40000000) &&
+                           (entry->function <= 0x40000082),
+                           "function %x is our of supported range",
+                           entry->function);
+
+               TEST_ASSERT(entry->index == 0,
+                           ".index field should be zero");
+
+               TEST_ASSERT(entry->flags == 0,
+                           ".flags field should be zero");
+
+               TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
+                           !entry->padding[2], "padding should be zero");
+
+               switch (entry->function) {
+               case 0x40000000:
+                       test_val = 0x40000082;
+
+                       TEST_ASSERT(entry->eax == test_val,
+                                   "Wrong max leaf report in 0x40000000.EAX: %x"
+                                   " (evmcs=%d)",
+                                   entry->eax, evmcs_expected
+                               );
+                       break;
+               case 0x40000004:
+                       test_val = entry->eax & (1UL << 18);
+
+                       TEST_ASSERT(!!test_val == !smt_possible(),
+                                   "NoNonArchitecturalCoreSharing bit"
+                                   " doesn't reflect SMT setting");
+                       break;
+               case 0x4000000A:
+                       TEST_ASSERT(entry->eax & (1UL << 19),
+                                   "Enlightened MSR-Bitmap should always be supported"
+                                   " 0x40000000.EAX: %x", entry->eax);
+                       if (evmcs_expected)
+                               TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+                                   "Supported Enlightened VMCS version range is supposed to be 1:1"
+                                   " 0x40000000.EAX: %x", entry->eax);
+
+                       break;
+               default:
+                       break;
+
+               }
+               /*
+                * If needed for debug:
+                * fprintf(stdout,
+                *      "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
+                *      entry->function, entry->eax, entry->ebx, entry->ecx,
+                *      entry->edx);
+                */
+       }
+}
+
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       static struct kvm_cpuid2 cpuid = {.nent = 0};
+       int ret;
+
+       if (vcpu)
+               ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+       else
+               ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+
+       TEST_ASSERT(ret == -1 && errno == E2BIG,
+                   "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+                   " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       const struct kvm_cpuid2 *hv_cpuid_entries;
+       struct kvm_vcpu *vcpu;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Test vCPU ioctl version */
+       test_hv_cpuid_e2big(vm, vcpu);
+
+       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+       test_hv_cpuid(hv_cpuid_entries, false);
+       free((void *)hv_cpuid_entries);
+
+       if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+           !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+               print_skip("Enlightened VMCS is unsupported");
+               goto do_sys;
+       }
+       vcpu_enable_evmcs(vcpu);
+       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+       test_hv_cpuid(hv_cpuid_entries, true);
+       free((void *)hv_cpuid_entries);
+
+do_sys:
+       /* Test system ioctl version */
+       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+               print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+               goto out;
+       }
+
+       test_hv_cpuid_e2big(vm, NULL);
+
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+       test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
+
+out:
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c

new file mode 100644 (file)

index 0000000..74cf196
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "hyperv.h"
+#include "vmx.h"
+
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       ud_count++;
+       regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+       /* Currently, L1 doesn't preserve GPRs during vmexits. */
+       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                             "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+/* Exit to L1 from L2 with RDMSR instruction */
+void l2_guest_code(void)
+{
+       u64 unused;
+
+       GUEST_SYNC(7);
+
+       GUEST_SYNC(8);
+
+       /* Forced exit to L1 upon restore */
+       GUEST_SYNC(9);
+
+       vmcall();
+
+       /* MSR-Bitmap tests */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+       vmcall();
+       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+       /* L2 TLB flush tests */
+       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+       rdmsr_from_l2(MSR_FS_BASE);
+       /*
+        * Note: hypercall status (RAX) is not preserved correctly by L1 after
+        * synthetic vmexit, use unchecked version.
+        */
+       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+                          &unused);
+
+       /* Done, exit to L1 and never come back.  */
+       vmcall();
+}
+
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+               vm_vaddr_t hv_hcall_page_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
+       x2apic_enable();
+
+       GUEST_SYNC(1);
+       GUEST_SYNC(2);
+
+       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+       evmcs_enable();
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_SYNC(3);
+       GUEST_ASSERT(load_evmcs(hv_pages));
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+       GUEST_SYNC(4);
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(5);
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+       current_evmcs->revision_id = -1u;
+       GUEST_ASSERT(vmlaunch());
+       current_evmcs->revision_id = EVMCS_VERSION;
+       GUEST_SYNC(6);
+
+       vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+               PIN_BASED_NMI_EXITING);
+
+       /* L2 TLB flush setup */
+       current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+       current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+       current_evmcs->hv_vm_id = 1;
+       current_evmcs->hv_vp_id = 1;
+       current_vp_assist->nested_control.features.directhypercall = 1;
+       *(u32 *)(hv_pages->partition_assist) = 0;
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+       /*
+        * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+        * up-to-date (RIP points where it should and not at the beginning
+        * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+        */
+       GUEST_ASSERT(!vmresume());
+
+       GUEST_SYNC(10);
+
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       current_evmcs->guest_rip += 3; /* vmcall */
+
+       /* Intercept RDMSR 0xc0000100 */
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+               CPU_BASED_USE_MSR_BITMAPS);
+       __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+
+       /* Enable enlightened MSR bitmap */
+       current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+
+       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+       __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+       current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+       GUEST_ASSERT(!vmresume());
+       /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       current_evmcs->guest_rip += 3; /* vmcall */
+
+       /* Now tell KVM we've changed MSR-Bitmap */
+       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+
+       /*
+        * L2 TLB flush test. First VMCALL should be handled directly by L0,
+        * no VMCALL exit expected.
+        */
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+       /* Enable synthetic vmexit */
+       *(u32 *)(hv_pages->partition_assist) = 1;
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_SYNC(11);
+
+       /* Try enlightened vmptrld with an incorrect GPA */
+       evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(ud_count == 1);
+       GUEST_DONE();
+}
+
+void inject_nmi(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_events events;
+
+       vcpu_events_get(vcpu, &events);
+
+       events.nmi.pending = 1;
+       events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+       vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+                                       struct kvm_vcpu *vcpu)
+{
+       struct kvm_regs regs1, regs2;
+       struct kvm_x86_state *state;
+
+       state = vcpu_save_state(vcpu);
+       memset(&regs1, 0, sizeof(regs1));
+       vcpu_regs_get(vcpu, &regs1);
+
+       kvm_vm_release(vm);
+
+       /* Restore state in a new VM.  */
+       vcpu = vm_recreate_with_one_vcpu(vm);
+       vcpu_set_hv_cpuid(vcpu);
+       vcpu_enable_evmcs(vcpu);
+       vcpu_load_state(vcpu, state);
+       kvm_x86_state_cleanup(state);
+
+       memset(&regs2, 0, sizeof(regs2));
+       vcpu_regs_get(vcpu, &regs2);
+       TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                   "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                   (ulong) regs2.rdi, (ulong) regs2.rsi);
+       return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+       vm_vaddr_t hcall_page;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       hcall_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
+
+       vcpu_set_hv_cpuid(vcpu);
+       vcpu_enable_evmcs(vcpu);
+
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+       vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+
+       pr_info("Running L1 which uses EVMCS to run L2\n");
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               vcpu = save_restore_vm(vm, vcpu);
+
+               /* Force immediate L2->L1 exit before resuming */
+               if (stage == 8) {
+                       pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+                       inject_nmi(vcpu);
+               }
+
+               /*
+                * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+                * restored VM (before the first KVM_RUN) to check that
+                * KVM_STATE_NESTED_EVMCS is not lost.
+                */
+               if (stage == 9) {
+                       pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+                       vcpu = save_restore_vm(vm, vcpu);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c

new file mode 100644 (file)

index 0000000..949e08e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <vipinsh@google.com>
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+                      vm_vaddr_t out_pg_gva)
+{
+       uint64_t *output_gva;
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+       output_gva = (uint64_t *)out_pg_gva;
+
+       hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+       /* TLFS states output will be a uint64_t value */
+       GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+       GUEST_DONE();
+}
+
+int main(void)
+{
+       vm_vaddr_t hcall_out_page;
+       vm_vaddr_t hcall_in_page;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       uint64_t *outval;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+       /* Verify if extended hypercalls are supported */
+       if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+                          HV_ENABLE_EXTENDED_HYPERCALLS)) {
+               print_skip("Extended calls not supported by the kernel");
+               exit(KSFT_SKIP);
+       }
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+       vcpu_set_hv_cpuid(vcpu);
+
+       /* Hypercall input */
+       hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+       /* Hypercall output */
+       hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+       vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+                     addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+       *outval = EXT_CAPABILITIES;
+       run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c

new file mode 100644 (file)

index 0000000..068e9c6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/*
+ * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
+ * but to activate the feature it is sufficient to set it to a non-zero
+ * value. Use BIT(0) for that.
+ */
+#define HV_PV_SPINLOCKS_TEST            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
+
+struct msr_data {
+       uint32_t idx;
+       bool fault_expected;
+       bool write;
+       u64 write_val;
+};
+
+struct hcall_data {
+       uint64_t control;
+       uint64_t expect;
+       bool ud_expected;
+};
+
+static bool is_write_only_msr(uint32_t msr)
+{
+       return msr == HV_X64_MSR_EOI;
+}
+
+static void guest_msr(struct msr_data *msr)
+{
+       uint8_t vector = 0;
+       uint64_t msr_val = 0;
+
+       GUEST_ASSERT(msr->idx);
+
+       if (msr->write)
+               vector = wrmsr_safe(msr->idx, msr->write_val);
+
+       if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
+               vector = rdmsr_safe(msr->idx, &msr_val);
+
+       if (msr->fault_expected)
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+                              msr->write ? "WR" : "RD", msr->idx, vector);
+       else
+               __GUEST_ASSERT(!vector,
+                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
+                              msr->write ? "WR" : "RD", msr->idx, vector);
+
+       if (vector || is_write_only_msr(msr->idx))
+               goto done;
+
+       if (msr->write)
+               __GUEST_ASSERT(!vector,
+                              "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
+                              msr->idx, msr->write_val, msr_val);
+
+       /* Invariant TSC bit appears when TSC invariant control MSR is written to */
+       if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
+               if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
+                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
+               else
+                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
+                                    !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
+       }
+
+done:
+       GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+       u64 res, input, output;
+       uint8_t vector;
+
+       GUEST_ASSERT_NE(hcall->control, 0);
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+       if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+               input = pgs_gpa;
+               output = pgs_gpa + 4096;
+       } else {
+               input = output = 0;
+       }
+
+       vector = __hyperv_hypercall(hcall->control, input, output, &res);
+       if (hcall->ud_expected) {
+               __GUEST_ASSERT(vector == UD_VECTOR,
+                              "Expected #UD for control '%lu', got vector '0x%x'",
+                              hcall->control, vector);
+       } else {
+               __GUEST_ASSERT(!vector,
+                              "Expected no exception for control '%lu', got vector '0x%x'",
+                              hcall->control, vector);
+               GUEST_ASSERT_EQ(res, hcall->expect);
+       }
+
+       GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Enable all supported Hyper-V features, then clear the leafs holding
+        * the features that will be tested one by one.
+        */
+       vcpu_set_hv_cpuid(vcpu);
+
+       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+       struct kvm_cpuid2 *prev_cpuid = NULL;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage = 0;
+       vm_vaddr_t msr_gva;
+       struct msr_data *msr;
+       bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
+
+       while (true) {
+               vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+               msr_gva = vm_vaddr_alloc_page(vm);
+               memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+               msr = addr_gva2hva(vm, msr_gva);
+
+               vcpu_args_set(vcpu, 1, msr_gva);
+               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+               if (!prev_cpuid) {
+                       vcpu_reset_hv_cpuid(vcpu);
+
+                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+               } else {
+                       vcpu_init_cpuid(vcpu, prev_cpuid);
+               }
+
+               /* TODO: Make this entire test easier to maintain. */
+               if (stage >= 21)
+                       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+               switch (stage) {
+               case 0:
+                       /*
+                        * Only available when Hyper-V identification is set
+                        */
+                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 1:
+                       msr->idx = HV_X64_MSR_HYPERCALL;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 2:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+                       /*
+                        * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+                        * HV_X64_MSR_HYPERCALL available.
+                        */
+                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
+                       msr->write = true;
+                       msr->write_val = HYPERV_LINUX_OS_ID;
+                       msr->fault_expected = false;
+                       break;
+               case 3:
+                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 4:
+                       msr->idx = HV_X64_MSR_HYPERCALL;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+
+               case 5:
+                       msr->idx = HV_X64_MSR_VP_RUNTIME;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 6:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
+                       msr->idx = HV_X64_MSR_VP_RUNTIME;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 7:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_VP_RUNTIME;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 8:
+                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 9:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
+                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 10:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 11:
+                       msr->idx = HV_X64_MSR_VP_INDEX;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 12:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
+                       msr->idx = HV_X64_MSR_VP_INDEX;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 13:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_VP_INDEX;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 14:
+                       msr->idx = HV_X64_MSR_RESET;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 15:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
+                       msr->idx = HV_X64_MSR_RESET;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 16:
+                       msr->idx = HV_X64_MSR_RESET;
+                       msr->write = true;
+                       /*
+                        * TODO: the test only writes '0' to HV_X64_MSR_RESET
+                        * at the moment, writing some other value there will
+                        * trigger real vCPU reset and the code is not prepared
+                        * to handle it yet.
+                        */
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 17:
+                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 18:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
+                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 19:
+                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 20:
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 21:
+                       /*
+                        * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+                        * capability enabled and guest visible CPUID bit unset.
+                        */
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 22:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 23:
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 24:
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 25:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 26:
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+               case 27:
+                       /* Direct mode test */
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = true;
+                       msr->write_val = 1 << 12;
+                       msr->fault_expected = true;
+                       break;
+               case 28:
+                       vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = true;
+                       msr->write_val = 1 << 12;
+                       msr->fault_expected = false;
+                       break;
+
+               case 29:
+                       msr->idx = HV_X64_MSR_EOI;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 30:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
+                       msr->idx = HV_X64_MSR_EOI;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+
+               case 31:
+                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 32:
+                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
+                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 33:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 34:
+                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 35:
+                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
+                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 36:
+                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+               case 37:
+                       /* Can only write '0' */
+                       msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 38:
+                       msr->idx = HV_X64_MSR_CRASH_P0;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 39:
+                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
+                       msr->idx = HV_X64_MSR_CRASH_P0;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 40:
+                       msr->idx = HV_X64_MSR_CRASH_P0;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+
+               case 41:
+                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 42:
+                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 43:
+                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 44:
+                       /* MSR is not available when CPUID feature bit is unset */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 45:
+                       /* MSR is vailable when CPUID feature bit is set */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 46:
+                       /* Writing bits other than 0 is forbidden */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = true;
+                       msr->write_val = 0xdeadbeef;
+                       msr->fault_expected = true;
+                       break;
+               case 47:
+                       /* Setting bit 0 enables the feature */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+
+               default:
+                       kvm_vm_free(vm);
+                       return;
+               }
+
+               vcpu_set_cpuid(vcpu);
+
+               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+               pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+                        msr->idx, msr->write ? "write" : "read");
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       return;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+                       return;
+               }
+
+next_stage:
+               stage++;
+               kvm_vm_free(vm);
+       }
+}
+
+static void guest_test_hcalls_access(void)
+{
+       struct kvm_cpuid2 *prev_cpuid = NULL;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage = 0;
+       vm_vaddr_t hcall_page, hcall_params;
+       struct hcall_data *hcall;
+
+       while (true) {
+               vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+               /* Hypercall input/output */
+               hcall_page = vm_vaddr_alloc_pages(vm, 2);
+               memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+               hcall_params = vm_vaddr_alloc_page(vm);
+               memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+               hcall = addr_gva2hva(vm, hcall_params);
+
+               vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+               if (!prev_cpuid) {
+                       vcpu_reset_hv_cpuid(vcpu);
+
+                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+               } else {
+                       vcpu_init_cpuid(vcpu, prev_cpuid);
+               }
+
+               switch (stage) {
+               case 0:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+                       hcall->control = 0xbeef;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+                       break;
+
+               case 1:
+                       hcall->control = HVCALL_POST_MESSAGE;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 2:
+                       vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
+                       hcall->control = HVCALL_POST_MESSAGE;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+
+               case 3:
+                       hcall->control = HVCALL_SIGNAL_EVENT;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 4:
+                       vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
+                       hcall->control = HVCALL_SIGNAL_EVENT;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+
+               case 5:
+                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+                       break;
+               case 6:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 7:
+                       vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
+                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
+                       hcall->expect = HV_STATUS_OPERATION_DENIED;
+                       break;
+
+               case 8:
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 9:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+               case 10:
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 11:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+
+               case 12:
+                       hcall->control = HVCALL_SEND_IPI;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 13:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
+                       hcall->control = HVCALL_SEND_IPI;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               case 14:
+                       /* Nothing in 'sparse banks' -> success */
+                       hcall->control = HVCALL_SEND_IPI_EX;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+
+               case 15:
+                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 16:
+                       vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
+                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+               case 17:
+                       /* XMM fast hypercall */
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+                       hcall->ud_expected = true;
+                       break;
+               case 18:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+                       hcall->ud_expected = false;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+               case 19:
+                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 20:
+                       vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+                       hcall->expect = HV_STATUS_INVALID_PARAMETER;
+                       break;
+               case 21:
+                       kvm_vm_free(vm);
+                       return;
+               }
+
+               vcpu_set_cpuid(vcpu);
+
+               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+               pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       return;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+                       return;
+               }
+
+               stage++;
+               kvm_vm_free(vm);
+       }
+}
+
+int main(void)
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
+
+       pr_info("Testing access to Hyper-V specific MSRs\n");
+       guest_test_msrs_access();
+
+       pr_info("Testing access to Hyper-V hypercalls\n");
+       guest_test_hcalls_access();
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c

new file mode 100644 (file)

index 0000000..22c0c12
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR      0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+       u64 format;
+       u64 valid_bank_mask;
+       u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+       HV_GENERIC_SET_SPARSE_4K,
+       HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+       u32 vector;
+       u32 reserved;
+       u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+       u32 vector;
+       u32 reserved;
+       struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+       u32 vcpu_id;
+
+       x2apic_enable();
+       hv_init(pgs_gpa);
+
+       vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+       /* Signal sender vCPU we're ready */
+       ipis_rcvd[vcpu_id] = (u64)-1;
+
+       for (;;)
+               asm volatile("sti; hlt; cli");
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+       ipis_rcvd[vcpu_id]++;
+       wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+       int i;
+
+       for (i = 0; i < 100000000; i++)
+               asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+       struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+       struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+       int stage = 1, ipis_expected[2] = {0};
+
+       hv_init(pgs_gpa);
+       GUEST_SYNC(stage++);
+
+       /* Wait for receiver vCPUs to come up */
+       while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+               nop_loop();
+       ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+       /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+       ipi->vector = IPI_VECTOR;
+       ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+       hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+       hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+                        IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+       ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+       ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+       ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+       ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+                        (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+       hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /*
+        * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+        */
+       ipi_ex->vp_set.valid_bank_mask = 0;
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+                        IPI_VECTOR, HV_GENERIC_SET_ALL);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+       int old, r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+                   vcpu->id, r);
+
+       vcpu_run(vcpu);
+
+       TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+       return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+       void *retval;
+       int r;
+
+       r = pthread_cancel(thread);
+       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+
+       r = pthread_join(thread, &retval);
+       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+       TEST_ASSERT(retval == PTHREAD_CANCELED,
+                   "expected retval=%p, got %p", PTHREAD_CANCELED,
+                   retval);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu[3];
+       vm_vaddr_t hcall_page;
+       pthread_t threads[2];
+       int stage = 1, r;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
+
+       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+       /* Hypercall input/output */
+       hcall_page = vm_vaddr_alloc_pages(vm, 2);
+       memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+
+       vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+       vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+       vcpu_set_hv_cpuid(vcpu[1]);
+
+       vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+       vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+       vcpu_set_hv_cpuid(vcpu[2]);
+
+       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+       vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_hv_cpuid(vcpu[0]);
+
+       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+       TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+       TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+       while (true) {
+               vcpu_run(vcpu[0]);
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu[0], &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(uc.args[1] == stage,
+                                   "Unexpected stage: %ld (%d expected)",
+                                   uc.args[1], stage);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               stage++;
+       }
+
+done:
+       cancel_join_vcpu_thread(threads[0], vcpu[1]);
+       cancel_join_vcpu_thread(threads[1], vcpu[2]);
+       kvm_vm_free(vm);
+
+       return r;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c

new file mode 100644 (file)

index 0000000..0ddb632
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+       /* Currently, L1 doesn't preserve GPRs during vmexits. */
+       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                             "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+void l2_guest_code(void)
+{
+       u64 unused;
+
+       GUEST_SYNC(3);
+       /* Exit to L1 */
+       vmmcall();
+
+       /* MSR-Bitmap tests */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+       vmmcall();
+       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+       GUEST_SYNC(5);
+
+       /* L2 TLB flush tests */
+       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                        HV_HYPERCALL_FAST_BIT, 0x0,
+                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                        HV_FLUSH_ALL_PROCESSORS);
+       rdmsr_from_l2(MSR_FS_BASE);
+       /*
+        * Note: hypercall status (RAX) is not preserved correctly by L1 after
+        * synthetic vmexit, use unchecked version.
+        */
+       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                          HV_HYPERCALL_FAST_BIT, 0x0,
+                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                          HV_FLUSH_ALL_PROCESSORS, &unused);
+
+       /* Done, exit to L1 and never come back.  */
+       vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+                                                   struct hyperv_test_pages *hv_pages,
+                                                   vm_vaddr_t pgs_gpa)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+       struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+       GUEST_SYNC(1);
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+       GUEST_ASSERT(svm->vmcb_gpa);
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* L2 TLB flush setup */
+       hve->partition_assist_page = hv_pages->partition_assist_gpa;
+       hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+       hve->hv_vm_id = 1;
+       hve->hv_vp_id = 1;
+       current_vp_assist->nested_control.features.directhypercall = 1;
+       *(u32 *)(hv_pages->partition_assist) = 0;
+
+       GUEST_SYNC(2);
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(4);
+       vmcb->save.rip += 3;
+
+       /* Intercept RDMSR 0xc0000100 */
+       vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+       __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+
+       /* Enable enlightened MSR bitmap */
+       hve->hv_enlightenments_control.msr_bitmap = 1;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+
+       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+       __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+       vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
+       run_guest(vmcb, svm->vmcb_gpa);
+       /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       vmcb->save.rip += 3; /* vmcall */
+
+       /* Now tell KVM we've changed MSR-Bitmap */
+       vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+
+
+       /*
+        * L2 TLB flush test. First VMCALL should be handled directly by L0,
+        * no VMCALL exit expected.
+        */
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+       /* Enable synthetic vmexit */
+       *(u32 *)(hv_pages->partition_assist) = 1;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+       GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(6);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+       vm_vaddr_t hcall_page;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_set_hv_cpuid(vcpu);
+       vcpu_alloc_svm(vm, &nested_gva);
+       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+       hcall_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
+
+       vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c

new file mode 100644 (file)

index 0000000..077cd0e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+       u64 format;
+       u64 valid_bank_mask;
+       u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+       HV_GENERIC_SET_SPARSE_4K,
+       HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+       u64 address_space;
+       u64 flags;
+       u64 processor_mask;
+       u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+       u64 address_space;
+       u64 flags;
+       struct hv_vpset hv_vp_set;
+       u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+       vm_vaddr_t hcall_gva;
+       vm_paddr_t hcall_gpa;
+       vm_vaddr_t test_pages;
+       vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+       struct test_data *data = (struct test_data *)test_data;
+       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+       void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+       u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+       u64 expected, val;
+
+       x2apic_enable();
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+       for (;;) {
+               cpu_relax();
+
+               expected = READ_ONCE(*this_cpu);
+
+               /*
+                * Make sure the value in the test page is read after reading
+                * the expectation for the first time. Pairs with wmb() in
+                * prepare_to_test().
+                */
+               rmb();
+
+               val = READ_ONCE(*(u64 *)data->test_pages);
+
+               /*
+                * Make sure the value in the test page is read after before
+                * reading the expectation for the second time. Pairs with wmb()
+                * post_test().
+                */
+               rmb();
+
+               /*
+                * '0' indicates the sender is between iterations, wait until
+                * the sender is ready for this vCPU to start checking again.
+                */
+               if (!expected)
+                       continue;
+
+               /*
+                * Re-read the per-vCPU byte to ensure the sender didn't move
+                * onto a new iteration.
+                */
+               if (expected != READ_ONCE(*this_cpu))
+                       continue;
+
+               GUEST_ASSERT(val == expected);
+       }
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+       void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+       *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+       uint64_t tmp = *(uint64_t *)pte_gva1;
+
+       *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+       *(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+       int i;
+
+       for (i = 0; i < 1000000; i++)
+               asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+       /* Clear hypercall input page */
+       memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+       /* 'Disable' workers */
+       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+       /* Make sure workers are 'disabled' before we swap PTEs. */
+       wmb();
+
+       /* Make sure workers have enough time to notice */
+       do_delay();
+
+       /* Swap test page mappings */
+       swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+       /* Make sure we change the expectation after swapping PTEs */
+       wmb();
+
+       /* Set the expectation for workers, '0' means don't test */
+       set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+       set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+       /* Make sure workers have enough time to test */
+       do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+       struct test_data *data = (struct test_data *)test_data;
+       struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+       struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+       vm_paddr_t hcall_gpa = data->hcall_gpa;
+       int i, stage = 1;
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+       /* "Slow" hypercalls */
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+                                hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                       HV_FLUSH_ALL_PROCESSORS;
+               flush->processor_mask = 0;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+                                hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                       HV_FLUSH_ALL_PROCESSORS;
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [1] */
+               flush_ex->gva_list[1] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [2] */
+               flush_ex->gva_list[2] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               flush_ex->gva_list[0] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       /* "Fast" hypercalls */
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                                HV_HYPERCALL_FAST_BIT, 0x0,
+                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                                HV_HYPERCALL_FAST_BIT, 0x0,
+                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                                HV_FLUSH_ALL_PROCESSORS);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                                HV_FLUSH_ALL_PROCESSORS);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [1] */
+               flush_ex->gva_list[1] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 :
+                         TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [2] */
+               flush_ex->gva_list[2] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                HV_HYPERCALL_FAST_BIT,
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               flush_ex->gva_list[0] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+       struct ucall uc;
+       int old;
+       int r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+                   vcpu->id, r);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               /* NOT REACHED */
+       default:
+               TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+       }
+
+       return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+       void *retval;
+       int r;
+
+       r = pthread_cancel(thread);
+       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+
+       r = pthread_join(thread, &retval);
+       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+       TEST_ASSERT(retval == PTHREAD_CANCELED,
+                   "expected retval=%p, got %p", PTHREAD_CANCELED,
+                   retval);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu[3];
+       pthread_t threads[2];
+       vm_vaddr_t test_data_page, gva;
+       vm_paddr_t gpa;
+       uint64_t *pte;
+       struct test_data *data;
+       struct ucall uc;
+       int stage = 1, r, i;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
+
+       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+       /* Test data page */
+       test_data_page = vm_vaddr_alloc_page(vm);
+       data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+       /* Hypercall input/output */
+       data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+       data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+       memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+       /*
+        * Test pages: the first one is filled with '0x01's, the second with '0x02's
+        * and the test will swap their mappings. The third page keeps the indication
+        * about the current state of mappings.
+        */
+       data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+       for (i = 0; i < NTEST_PAGES; i++)
+               memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+                      (u8)(i + 1), PAGE_SIZE);
+       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+       /*
+        * Get PTE pointers for test pages and map them inside the guest.
+        * Use separate page for each PTE for simplicity.
+        */
+       gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+       for (i = 0; i < NTEST_PAGES; i++) {
+               pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+               gpa = addr_hva2gpa(vm, pte);
+               __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+               data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+       }
+
+       /*
+        * Sender vCPU which performs the test: swaps test pages, sets expectation
+        * for 'workers' and issues TLB flush hypercalls.
+        */
+       vcpu_args_set(vcpu[0], 1, test_data_page);
+       vcpu_set_hv_cpuid(vcpu[0]);
+
+       /* Create worker vCPUs which check the contents of the test pages */
+       vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+       vcpu_args_set(vcpu[1], 1, test_data_page);
+       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+       vcpu_set_hv_cpuid(vcpu[1]);
+
+       vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+       vcpu_args_set(vcpu[2], 1, test_data_page);
+       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+       vcpu_set_hv_cpuid(vcpu[2]);
+
+       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+       TEST_ASSERT(!r, "pthread_create() failed");
+
+       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+       TEST_ASSERT(!r, "pthread_create() failed");
+
+       while (true) {
+               vcpu_run(vcpu[0]);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu[0], &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(uc.args[1] == stage,
+                                   "Unexpected stage: %ld (%d expected)",
+                                   uc.args[1], stage);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               stage++;
+       }
+
+done:
+       cancel_join_vcpu_thread(threads[0], vcpu[1]);
+       cancel_join_vcpu_thread(threads[1], vcpu[2]);
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c

new file mode 100644 (file)

index 0000000..5bc1222
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+       uint64_t kvmclock_base;
+       int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+       { .kvmclock_base = 0 },
+       { .kvmclock_base = 180 * NSEC_PER_SEC },
+       { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+       { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val)                       \
+               GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+       int i;
+
+       wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+               GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+       TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+                   "unexpected clock data flags: %x (want set: %x)",
+                   data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+                       struct kvm_clock_data *end)
+{
+       uint64_t obs, exp_lo, exp_hi;
+
+       obs = uc->args[2];
+       exp_lo = start->clock;
+       exp_hi = end->clock;
+
+       assert_flags(start);
+       assert_flags(end);
+
+       TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+                   "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+                   obs, exp_lo, exp_hi);
+
+       pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+               obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+       REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+       struct kvm_clock_data data;
+
+       memset(&data, 0, sizeof(data));
+
+       data.clock = test_case->kvmclock_base;
+       if (test_case->realtime_offset) {
+               struct timespec ts;
+               int r;
+
+               data.flags |= KVM_CLOCK_REALTIME;
+               do {
+                       r = clock_gettime(CLOCK_REALTIME, &ts);
+                       if (!r)
+                               break;
+               } while (errno == EINTR);
+
+               TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
+
+               data.realtime = ts.tv_sec * NSEC_PER_SEC;
+               data.realtime += ts.tv_nsec;
+               data.realtime += test_case->realtime_offset;
+       }
+
+       vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct kvm_clock_data start, end;
+       struct kvm_vm *vm = vcpu->vm;
+       struct ucall uc;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+               setup_clock(vm, &test_cases[i]);
+
+               vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+               vcpu_run(vcpu);
+               vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       handle_sync(&uc, &start, &end);
+                       break;
+               case UCALL_ABORT:
+                       handle_abort(&uc);
+                       return;
+               default:
+                       TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
+               }
+       }
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t pvti_gva;
+       vm_paddr_t pvti_gpa;
+       struct kvm_vm *vm;
+       int flags;
+
+       flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+       TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+       pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+       vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+       enter_guest(vcpu);
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c

new file mode 100644 (file)

index 0000000..78878b3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+       uint32_t idx;
+       const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+       TEST_MSR(MSR_KVM_SYSTEM_TIME),
+       TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+       TEST_MSR(MSR_KVM_WALL_CLOCK),
+       TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+       TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+       TEST_MSR(MSR_KVM_STEAL_TIME),
+       TEST_MSR(MSR_KVM_PV_EOI_EN),
+       TEST_MSR(MSR_KVM_POLL_CONTROL),
+       TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+       TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+       uint64_t ignored;
+       uint8_t vector;
+
+       PR_MSR(msr);
+
+       vector = rdmsr_safe(msr->idx, &ignored);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+       vector = wrmsr_safe(msr->idx, 0);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
+}
+
+struct hcall_data {
+       uint64_t nr;
+       const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+       TEST_HCALL(KVM_HC_KICK_CPU),
+       TEST_HCALL(KVM_HC_SEND_IPI),
+       TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+       uint64_t r;
+
+       PR_HCALL(hc);
+       r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+               test_msr(&msrs_to_test[i]);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+               test_hcall(&hcalls_to_test[i]);
+       }
+
+       GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+       struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+       pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+       struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+       pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       while (true) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_PR_MSR:
+                       pr_msr(&uc);
+                       break;
+               case UCALL_PR_HCALL:
+                       pr_hcall(&uc);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       return;
+               case UCALL_DONE:
+                       return;
+               }
+       }
+}
+
+static void test_pv_unhalt(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_cpuid_entry2 *ent;
+       u32 kvm_sig_old;
+
+       pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+       TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+       /* KVM_PV_UNHALT test */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+       TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+                   "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+       /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+       kvm_sig_old = ent->ebx;
+       ent->ebx = 0xdeadbeef;
+       vcpu_set_cpuid(vcpu);
+
+       vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+       ent->ebx = kvm_sig_old;
+       vcpu_set_cpuid(vcpu);
+
+       TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+                   "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+       /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+       kvm_vm_free(vm);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+       vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+       enter_guest(vcpu);
+       kvm_vm_free(vm);
+
+       test_pv_unhalt();
+}
diff --git a/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c

new file mode 100644 (file)

index 0000000..7e2bfb3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID    2
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones();
+
+       /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+       ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+       /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+       TEST_ASSERT(ret < 0,
+                   "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+       /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
+       if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+               vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
+
+               /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
+               ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
+               TEST_ASSERT(ret < 0,
+                           "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
+       }
+
+       /* Set KVM_CAP_MAX_VCPU_ID */
+       vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+       /* Try to set KVM_CAP_MAX_VCPU_ID again */
+       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+       TEST_ASSERT(ret < 0,
+                   "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+       /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
+       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+       TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+       /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
+       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
+       TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
+
+       /* Create vCPU with id within bounds */
+       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
+       TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
+
+       close(ret);
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c

new file mode 100644 (file)

index 0000000..2b550ef
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+       MWAIT_QUIRK_DISABLED = BIT(0),
+       MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+       MWAIT_DISABLED = BIT(2),
+};
+
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
+do {                                                                   \
+       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
+                           ((testcase) & MWAIT_DISABLED);              \
+                                                                       \
+       if (fault_wanted)                                               \
+               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
+                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
+                              testcase, vector);                       \
+       else                                                            \
+               __GUEST_ASSERT(!(vector),                               \
+                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
+                              testcase, vector);                       \
+} while (0)
+
+static void guest_monitor_wait(int testcase)
+{
+       u8 vector;
+
+       GUEST_SYNC(testcase);
+
+       /*
+        * Arbitrarily MONITOR this function, SVM performs fault checks before
+        * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+        */
+       vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
+
+       vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
+}
+
+static void guest_code(void)
+{
+       guest_monitor_wait(MWAIT_DISABLED);
+
+       guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       uint64_t disabled_quirks;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int testcase;
+
+       TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+       while (1) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       testcase = uc.args[1];
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       goto done;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+                       goto done;
+               }
+
+               disabled_quirks = 0;
+               if (testcase & MWAIT_QUIRK_DISABLED)
+                       disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+               if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+                       disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+               /*
+                * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+                * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+                * bit in MISC_ENABLES accordingly.  If the quirk is enabled,
+                * the only valid configuration is MWAIT disabled, as CPUID
+                * can't be manually changed after running the vCPU.
+                */
+               if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+                       TEST_ASSERT(testcase & MWAIT_DISABLED,
+                                   "Can't toggle CPUID features after running vCPU");
+                       continue;
+               }
+
+               vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+                            (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c

new file mode 100644 (file)

index 0000000..3eb0313
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR       0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception.  AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG    BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG    BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS           (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF                (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF     (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+       GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+       GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+       GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+       GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+                      uint32_t error_code)
+{
+       struct vmcb *vmcb = svm->vmcb;
+       struct vmcb_control_area *ctrl = &vmcb->control;
+
+       vmcb->save.rip = (u64)l2_code;
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+               return;
+
+       GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+       GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+       struct vmcb_control_area *ctrl = &svm->vmcb->control;
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       svm->vmcb->save.idtr.limit = 0;
+       ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+       ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+       svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+       svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+       ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+       svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+       ctrl->intercept_exceptions = INTERCEPT_SS;
+       svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+       GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+       GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+       GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+       GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+               return;
+
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+       GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+       prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+       /*
+        * VMX disallows injecting an exception with error_code[31:16] != 0,
+        * and hardware will never generate a VM-Exit with bits 31:16 set.
+        * KVM should likewise truncate the "bad" userspace value.
+        */
+       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+       vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+       vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+       vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+       vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+       GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+       if (this_cpu_has(X86_FEATURE_SVM))
+               l1_svm_code(test_data);
+       else
+               l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+       struct ucall uc;
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               TEST_ASSERT(vector == uc.args[1],
+                           "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+               break;
+       case UCALL_DONE:
+               TEST_ASSERT(vector == -1,
+                           "Expected L2 to ask for %d, L2 says it's done", vector);
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       default:
+               TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+       }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+       struct kvm_vcpu_events events;
+
+       vcpu_events_get(vcpu, &events);
+
+       TEST_ASSERT(!events.exception.pending,
+                   "Vector %d unexpectedlt pending", events.exception.nr);
+       TEST_ASSERT(!events.exception.injected,
+                   "Vector %d unexpectedly injected", events.exception.nr);
+
+       events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+       events.exception.pending = !inject;
+       events.exception.injected = inject;
+       events.exception.nr = SS_VECTOR;
+       events.exception.has_error_code = true;
+       events.exception.error_code = SS_ERROR_CODE;
+       vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2.  Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t nested_test_data_gva;
+       struct kvm_vcpu_events events;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+       if (kvm_cpu_has(X86_FEATURE_SVM))
+               vcpu_alloc_svm(vm, &nested_test_data_gva);
+       else
+               vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+       vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+       /* Run L1 => L2.  L2 should sync and request #SS. */
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, SS_VECTOR);
+
+       /* Pend #SS and request immediate exit.  #SS should still be pending. */
+       queue_ss_exception(vcpu, false);
+       vcpu->run->immediate_exit = true;
+       vcpu_run_complete_io(vcpu);
+
+       /* Verify the pending events comes back out the same as it went in. */
+       vcpu_events_get(vcpu, &events);
+       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+                       KVM_VCPUEVENT_VALID_PAYLOAD);
+       TEST_ASSERT_EQ(events.exception.pending, true);
+       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+       TEST_ASSERT_EQ(events.exception.has_error_code, true);
+       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+       /*
+        * Run for real with the pending #SS, L1 should get a VM-Exit due to
+        * #SS interception and re-enter L2 to request #GP (via injected #SS).
+        */
+       vcpu->run->immediate_exit = false;
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, GP_VECTOR);
+
+       /*
+        * Inject #SS, the #SS should bypass interception and cause #GP, which
+        * L1 should intercept before KVM morphs it to #DF.  L1 should then
+        * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+        */
+       queue_ss_exception(vcpu, true);
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, DF_VECTOR);
+
+       /*
+        * Inject #SS, the #SS should bypass interception and cause #GP, which
+        * L1 is no longer interception, and so should see a #DF VM-Exit.  L1
+        * should then signal that is done.
+        */
+       queue_ss_exception(vcpu, true);
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+       /*
+        * Inject #SS yet again.  L1 is not intercepting #GP or #DF, and so
+        * should see nested TRIPLE_FAULT / SHUTDOWN.
+        */
+       queue_ss_exception(vcpu, true);
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, -1);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c

new file mode 100644 (file)

index 0000000..e7efb2b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT             10
+#define HPAGE_GPA              (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA              HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES      (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+       ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+       uint64_t hpage_1 = HPAGE_GVA;
+       uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+       uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+       READ_ONCE(*(uint64_t *)hpage_1);
+       GUEST_SYNC(1);
+
+       READ_ONCE(*(uint64_t *)hpage_2);
+       GUEST_SYNC(2);
+
+       guest_do_CALL(hpage_1);
+       GUEST_SYNC(3);
+
+       guest_do_CALL(hpage_3);
+       GUEST_SYNC(4);
+
+       READ_ONCE(*(uint64_t *)hpage_1);
+       GUEST_SYNC(5);
+
+       READ_ONCE(*(uint64_t *)hpage_3);
+       GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+       int actual_pages_2m;
+
+       actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+       TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+                   "Unexpected 2m page count. Expected %d, got %d",
+                   expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+       int actual_splits;
+
+       actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+       TEST_ASSERT(actual_splits == expected_splits,
+                   "Unexpected NX huge page split count. Expected %d, got %d",
+                   expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+       long reclaim_wait_ms;
+       struct timespec ts;
+
+       reclaim_wait_ms = reclaim_period_ms * 5;
+       ts.tv_sec = reclaim_wait_ms / 1000;
+       ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+       nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+             bool reboot_permissions)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t nr_bytes;
+       void *hva;
+       int r;
+
+       vm = vm_create(1);
+
+       if (disable_nx_huge_pages) {
+               r = __vm_disable_nx_huge_pages(vm);
+               if (reboot_permissions) {
+                       TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+               } else {
+                       TEST_ASSERT(r == -1 && errno == EPERM,
+                                   "This process should not have permission to disable NX huge pages");
+                       return;
+               }
+       }
+
+       vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+                                   HPAGE_GPA, HPAGE_SLOT,
+                                   HPAGE_SLOT_NPAGES, 0);
+
+       nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+       /*
+        * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+        * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+        * whenever KVM is shadowing the guest page tables).
+        *
+        * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+        * pages irrespective of the guest page size, so map with 4KiB pages
+        * to test that that is the case.
+        */
+       if (kvm_is_tdp_enabled())
+               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+       else
+               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+       hva = addr_gpa2hva(vm, HPAGE_GPA);
+       memset(hva, RETURN_OPCODE, nr_bytes);
+
+       check_2m_page_count(vm, 0);
+       check_split_count(vm, 0);
+
+       /*
+        * The guest code will first read from the first hugepage, resulting
+        * in a huge page mapping being created.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, 1);
+       check_split_count(vm, 0);
+
+       /*
+        * Then the guest code will read from the second hugepage, resulting
+        * in another huge page mapping being created.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, 2);
+       check_split_count(vm, 0);
+
+       /*
+        * Next, the guest will execute from the first huge page, causing it
+        * to be remapped at 4k.
+        *
+        * If NX huge pages are disabled, this should have no effect.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+       check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+       /*
+        * Executing from the third huge page (previously unaccessed) will
+        * cause part to be mapped at 4k.
+        *
+        * If NX huge pages are disabled, it should be mapped at 2M.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+       /* Reading from the first huge page again should have no effect. */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+       /* Give recovery thread time to run. */
+       wait_for_reclaim(reclaim_period_ms);
+
+       /*
+        * Now that the reclaimer has run, all the split pages should be gone.
+        *
+        * If NX huge pages are disabled, the relaimer will not run, so
+        * nothing should change from here on.
+        */
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+       check_split_count(vm, 0);
+
+       /*
+        * The 4k mapping on hpage 3 should have been removed, so check that
+        * reading from it causes a huge page mapping to be installed.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+       check_split_count(vm, 0);
+
+       kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+       puts("");
+       printf(" -p: The NX reclaim period in milliseconds.\n");
+       printf(" -t: The magic token to indicate environment setup is done.\n");
+       printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       int reclaim_period_ms = 0, token = 0, opt;
+       bool reboot_permissions = false;
+
+       while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+               switch (opt) {
+               case 'p':
+                       reclaim_period_ms = atoi_positive("Reclaim period", optarg);
+                       break;
+               case 't':
+                       token = atoi_paranoid(optarg);
+                       break;
+               case 'r':
+                       reboot_permissions = true;
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+
+       __TEST_REQUIRE(token == MAGIC_TOKEN,
+                      "This test must be run with the magic token via '-t %d'.\n"
+                      "Running via nx_huge_pages_test.sh, which also handles "
+                      "environment setup, is strongly recommended.", MAGIC_TOKEN);
+
+       run_test(reclaim_period_ms, false, reboot_permissions);
+       run_test(reclaim_period_ms, true, reboot_permissions);
+
+       return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh

new file mode 100755 (executable)

index 0000000..caad084
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+# If we're already root, the host might not have sudo.
+if [ $(whoami) == "root" ]; then
+       function do_sudo () {
+               "$@"
+       }
+else
+       function do_sudo () {
+               sudo "$@"
+       }
+fi
+
+set +e
+
+function sudo_echo () {
+       echo "$1" | do_sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+       set -e
+
+       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+       sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+       sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+       # Test with reboot permissions
+       if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+               echo Running test with CAP_SYS_BOOT enabled
+               $NXECUTABLE -t 887563923 -p 100 -r
+               test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+       else
+               echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+       fi
+
+       # Test without reboot permissions
+       if [ $(whoami) != "root" ] ; then
+               echo Running test with CAP_SYS_BOOT disabled
+               $NXECUTABLE -t 887563923 -p 100
+       else
+               echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+       fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c

new file mode 100644 (file)

index 0000000..9cbf283
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/platform_info_test.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+       uint64_t msr_platform_info;
+       uint8_t vector;
+
+       GUEST_SYNC(true);
+       msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+       GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+                       MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+       GUEST_SYNC(false);
+       vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t msr_platform_info;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+       vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+                    msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+       for (;;) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+                       break;
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c

new file mode 100644 (file)

index 0000000..698cb36
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of iterations of the loop for the guest measurement payload. */
+#define NUM_LOOPS                      10
+
+/* Each iteration of the loop retires one branch instruction. */
+#define NUM_BRANCH_INSNS_RETIRED       (NUM_LOOPS)
+
+/*
+ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
+ * 1 LOOP.
+ */
+#define NUM_INSNS_PER_LOOP             3
+
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disable the
+ * counter.  2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS                        5
+
+/* Total number of instructions retired within the measured section. */
+#define NUM_INSNS_RETIRED              (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+                                                 void *guest_code,
+                                                 uint8_t pmu_version,
+                                                 uint64_t perf_capabilities)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       sync_global_to_guest(vm, kvm_pmu_version);
+
+       /*
+        * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+        * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+        */
+       if (kvm_has_perf_caps)
+               vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+       vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+       return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       do {
+               vcpu_run(vcpu);
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_PRINTF:
+                       pr_info("%s", uc.buffer);
+                       break;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+       /*
+        * Return the effective PMU version, i.e. the minimum between what KVM
+        * supports and what is enumerated to the guest.  The host deliberately
+        * advertises a PMU version to the guest beyond what is actually
+        * supported by KVM to verify KVM doesn't freak out and do something
+        * bizarre with an architecturally valid, but unsupported, version.
+        */
+       return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero.  If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+                                    struct kvm_x86_pmu_feature event,
+                                    uint32_t pmc, uint32_t pmc_msr)
+{
+       uint64_t count;
+
+       count = _rdpmc(pmc);
+       if (!this_pmu_has(event))
+               goto sanity_checks;
+
+       switch (idx) {
+       case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+               break;
+       case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+               break;
+       case INTEL_ARCH_LLC_REFERENCES_INDEX:
+       case INTEL_ARCH_LLC_MISSES_INDEX:
+               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+                   !this_cpu_has(X86_FEATURE_CLFLUSH))
+                       break;
+               fallthrough;
+       case INTEL_ARCH_CPU_CYCLES_INDEX:
+       case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+               GUEST_ASSERT_NE(count, 0);
+               break;
+       case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+               GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+               break;
+       default:
+               break;
+       }
+
+sanity_checks:
+       __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+       GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+       wrmsr(pmc_msr, 0xdead);
+       GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence.  Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
+ * misses, i.e. to allow testing that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)                                \
+do {                                                                           \
+       __asm__ __volatile__("wrmsr\n\t"                                        \
+                            " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"      \
+                            "1:\n\t"                                           \
+                            clflush "\n\t"                                     \
+                            "mfence\n\t"                                       \
+                            FEP "loop 1b\n\t"                                  \
+                            FEP "mov %%edi, %%ecx\n\t"                         \
+                            FEP "xor %%eax, %%eax\n\t"                         \
+                            FEP "xor %%edx, %%edx\n\t"                         \
+                            "wrmsr\n\t"                                        \
+                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
+                               "c"(_msr), "D"(_msr)                            \
+       );                                                                      \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do {                                                                           \
+       wrmsr(pmc_msr, 0);                                                      \
+                                                                               \
+       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))                               \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);    \
+       else if (this_cpu_has(X86_FEATURE_CLFLUSH))                             \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);       \
+       else                                                                    \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);             \
+                                                                               \
+       guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);                 \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+                                   uint32_t pmc, uint32_t pmc_msr,
+                                   uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+       GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+       if (is_forced_emulation_enabled)
+               GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL                                           \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {};                        \
+                                                                       \
+       feature;                                                        \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+       return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+       const struct {
+               struct kvm_x86_pmu_feature gp_event;
+               struct kvm_x86_pmu_feature fixed_event;
+       } intel_event_to_feature[] = {
+               [INTEL_ARCH_CPU_CYCLES_INDEX]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+               [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+               /*
+                * Note, the fixed counter for reference cycles is NOT the same
+                * as the general purpose architectural event.  The fixed counter
+                * explicitly counts at the same frequency as the TSC, whereas
+                * the GP event counts at a fixed, but uarch specific, frequency.
+                * Bundle them here for simplicity.
+                */
+               [INTEL_ARCH_REFERENCE_CYCLES_INDEX]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+               [INTEL_ARCH_LLC_REFERENCES_INDEX]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_LLC_MISSES_INDEX]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_RETIRED_INDEX]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_TOPDOWN_SLOTS_INDEX]         = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+       };
+
+       uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint32_t pmu_version = guest_get_pmu_version();
+       /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+       bool guest_has_perf_global_ctrl = pmu_version >= 2;
+       struct kvm_x86_pmu_feature gp_event, fixed_event;
+       uint32_t base_pmc_msr;
+       unsigned int i;
+
+       /* The host side shouldn't invoke this without a guest PMU. */
+       GUEST_ASSERT(pmu_version);
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_pmc_msr = MSR_IA32_PMC0;
+       else
+               base_pmc_msr = MSR_IA32_PERFCTR0;
+
+       gp_event = intel_event_to_feature[idx].gp_event;
+       GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+       GUEST_ASSERT(nr_gp_counters);
+
+       for (i = 0; i < nr_gp_counters; i++) {
+               uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+                                   ARCH_PERFMON_EVENTSEL_ENABLE |
+                                   intel_pmu_arch_events[idx];
+
+               wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+               if (guest_has_perf_global_ctrl)
+                       wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+               __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+                                       MSR_P6_EVNTSEL0 + i, eventsel);
+       }
+
+       if (!guest_has_perf_global_ctrl)
+               return;
+
+       fixed_event = intel_event_to_feature[idx].fixed_event;
+       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+               return;
+
+       i = fixed_event.f.bit;
+
+       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+       __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+                               MSR_CORE_PERF_FIXED_CTR0 + i,
+                               MSR_CORE_PERF_GLOBAL_CTRL,
+                               FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+       uint8_t i;
+
+       for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+               guest_test_arch_event(i);
+
+       GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t length, uint8_t unavailable_mask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Testing arch events requires a vPMU (there are no negative tests). */
+       if (!pmu_version)
+               return;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+                               length);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+                               unavailable_mask);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS     8
+#define MAX_NR_FIXED_COUNTERS  3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)              \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,                      \
+              "Expected %s on " #insn "(0x%x), got vector %u",                 \
+              expect_gp ? "#GP" : "no fault", msr, vector)                     \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)                       \
+       __GUEST_ASSERT(val == expected_val,                                     \
+                      "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",    \
+                      msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+                            uint64_t expected_val)
+{
+       uint8_t vector;
+       uint64_t val;
+
+       vector = rdpmc_safe(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+       if (!is_forced_emulation_enabled)
+               return;
+
+       vector = rdpmc_safe_fep(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+                                uint8_t nr_counters, uint32_t or_mask)
+{
+       const bool pmu_has_fast_mode = !guest_get_pmu_version();
+       uint8_t i;
+
+       for (i = 0; i < nr_possible_counters; i++) {
+               /*
+                * TODO: Test a value that validates full-width writes and the
+                * width of the counters.
+                */
+               const uint64_t test_val = 0xffff;
+               const uint32_t msr = base_msr + i;
+
+               /*
+                * Fixed counters are supported if the counter is less than the
+                * number of enumerated contiguous counters *or* the counter is
+                * explicitly enumerated in the supported counters mask.
+                */
+               const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+               /*
+                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+                * unsupported, i.e. doesn't #GP and reads back '0'.
+                */
+               const uint64_t expected_val = expect_success ? test_val : 0;
+               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+                                      msr != MSR_P6_PERFCTR1;
+               uint32_t rdpmc_idx;
+               uint8_t vector;
+               uint64_t val;
+
+               vector = wrmsr_safe(msr, test_val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+               vector = rdmsr_safe(msr, &val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+               /* On #GP, the result of RDMSR is undefined. */
+               if (!expect_gp)
+                       GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+               /*
+                * Redo the read tests with RDPMC, which has different indexing
+                * semantics and additional capabilities.
+                */
+               rdpmc_idx = i;
+               if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+                       rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+               guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+               /*
+                * KVM doesn't support non-architectural PMUs, i.e. it should
+                * impossible to have fast mode RDPMC.  Verify that attempting
+                * to use fast RDPMC always #GPs.
+                */
+               GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+               rdpmc_idx |= INTEL_RDPMC_FAST;
+               guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+               vector = wrmsr_safe(msr, 0);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+       }
+}
+
+static void guest_test_gp_counters(void)
+{
+       uint8_t pmu_version = guest_get_pmu_version();
+       uint8_t nr_gp_counters = 0;
+       uint32_t base_msr;
+
+       if (pmu_version)
+               nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+       /*
+        * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
+        * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
+        * of GP counters.  If there are no GP counters, require KVM to leave
+        * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
+        * follow the spirit of the architecture and only globally enable GP
+        * counters, of which there are none.
+        */
+       if (pmu_version > 1) {
+               uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+
+               if (nr_gp_counters)
+                       GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
+               else
+                       GUEST_ASSERT_EQ(global_ctrl, 0);
+       }
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_msr = MSR_IA32_PMC0;
+       else
+               base_msr = MSR_IA32_PERFCTR0;
+
+       guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+       GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t nr_gp_counters)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+                               nr_gp_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+       uint64_t supported_bitmask = 0;
+       uint8_t nr_fixed_counters = 0;
+       uint8_t i;
+
+       /* Fixed counters require Architectural vPMU Version 2+. */
+       if (guest_get_pmu_version() >= 2)
+               nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+       /*
+        * The supported bitmask for fixed counters was introduced in PMU
+        * version 5.
+        */
+       if (guest_get_pmu_version() >= 5)
+               supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+       guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+                            nr_fixed_counters, supported_bitmask);
+
+       for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+               uint8_t vector;
+               uint64_t val;
+
+               if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+                       vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+                                           FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+                       vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+                                           FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+                       continue;
+               }
+
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+               GUEST_ASSERT_NE(val, 0);
+       }
+       GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                               uint8_t nr_fixed_counters,
+                               uint32_t supported_bitmask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+                               supported_bitmask);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+                               nr_fixed_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+       uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       unsigned int i;
+       uint8_t v, j;
+       uint32_t k;
+
+       const uint64_t perf_caps[] = {
+               0,
+               PMU_CAP_FW_WRITES,
+       };
+
+       /*
+        * Test up to PMU v5, which is the current maximum version defined by
+        * Intel, i.e. is the last version that is guaranteed to be backwards
+        * compatible with KVM's existing behavior.
+        */
+       uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+       /*
+        * Detect the existence of events that aren't supported by selftests.
+        * This will (obviously) fail any time the kernel adds support for a
+        * new event, but it's worth paying that price to keep the test fresh.
+        */
+       TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+                   "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+                   nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+       /*
+        * Force iterating over known arch events regardless of whether or not
+        * KVM/hardware supports a given event.
+        */
+       nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+       for (v = 0; v <= max_pmu_version; v++) {
+               for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+                       if (!kvm_has_perf_caps && perf_caps[i])
+                               continue;
+
+                       pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       /*
+                        * To keep the total runtime reasonable, test every
+                        * possible non-zero, non-reserved bitmap combination
+                        * only with the native PMU version and the full bit
+                        * vector length.
+                        */
+                       if (v == pmu_version) {
+                               for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+                                       test_arch_events(v, perf_caps[i], nr_arch_events, k);
+                       }
+                       /*
+                        * Test single bits for all PMU version and lengths up
+                        * the number of events +1 (to verify KVM doesn't do
+                        * weird things if the guest length is greater than the
+                        * host length).  Explicitly test a mask of '0' and all
+                        * ones i.e. all events being available and unavailable.
+                        */
+                       for (j = 0; j <= nr_arch_events + 1; j++) {
+                               test_arch_events(v, perf_caps[i], j, 0);
+                               test_arch_events(v, perf_caps[i], j, 0xff);
+
+                               for (k = 0; k < nr_arch_events; k++)
+                                       test_arch_events(v, perf_caps[i], j, BIT(k));
+                       }
+
+                       pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_gp_counters; j++)
+                               test_gp_counters(v, perf_caps[i], j);
+
+                       pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_fixed_counters; j++) {
+                               for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+                                       test_fixed_counters(v, perf_caps[i], j, k);
+                       }
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+
+       TEST_REQUIRE(host_cpu_is_intel);
+       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+       kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+
+       test_intel_counters();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c

new file mode 100644 (file)

index 0000000..c15513c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_BRANCHES 42
+#define MAX_TEST_EVENTS                10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
+
+struct __kvm_pmu_event_filter {
+       __u32 action;
+       __u32 nevents;
+       __u32 fixed_counter_bitmap;
+       __u32 flags;
+       __u32 pad[4];
+       __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+};
+
+/*
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * Branch Instructions Retired for Zen CPUs.  Note, AMD and Intel use the
+ * same encoding for Instructions Retired.
+ */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+static const struct __kvm_pmu_event_filter base_event_filter = {
+       .nevents = ARRAY_SIZE(base_event_filter.events),
+       .events = {
+               INTEL_ARCH_CPU_CYCLES,
+               INTEL_ARCH_INSTRUCTIONS_RETIRED,
+               INTEL_ARCH_REFERENCE_CYCLES,
+               INTEL_ARCH_LLC_REFERENCES,
+               INTEL_ARCH_LLC_MISSES,
+               INTEL_ARCH_BRANCHES_RETIRED,
+               INTEL_ARCH_BRANCHES_MISPREDICTED,
+               INTEL_ARCH_TOPDOWN_SLOTS,
+               AMD_ZEN_BRANCHES_RETIRED,
+       },
+};
+
+struct {
+       uint64_t loads;
+       uint64_t stores;
+       uint64_t loads_stores;
+       uint64_t branches_retired;
+       uint64_t instructions_retired;
+} pmc_results;
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       GUEST_SYNC(-EFAULT);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+       uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+       wrmsr(msr, v);
+       if (rdmsr(msr) != v)
+               GUEST_SYNC(-EIO);
+
+       v ^= bits_to_flip;
+       wrmsr(msr, v);
+       if (rdmsr(msr) != v)
+               GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+       const uint64_t branches_retired = rdmsr(msr_base + 0);
+       const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+       __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+       pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+       pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
+}
+
+static void intel_guest_code(void)
+{
+       check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+       check_msr(MSR_P6_EVNTSEL0, 0xffff);
+       check_msr(MSR_IA32_PMC0, 0xffff);
+       GUEST_SYNC(0);
+
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
+               wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+               run_and_measure_loop(MSR_IA32_PMC0);
+               GUEST_SYNC(0);
+       }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+       check_msr(MSR_K7_EVNTSEL0, 0xffff);
+       check_msr(MSR_K7_PERFCTR0, 0xffff);
+       GUEST_SYNC(0);
+
+       for (;;) {
+               wrmsr(MSR_K7_EVNTSEL0, 0);
+               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
+               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+               run_and_measure_loop(MSR_K7_PERFCTR0);
+               GUEST_SYNC(0);
+       }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       get_ucall(vcpu, &uc);
+       TEST_ASSERT(uc.cmd == UCALL_SYNC,
+                   "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+       return uc.args[1];
+}
+
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+       uint64_t r;
+
+       memset(&pmc_results, 0, sizeof(pmc_results));
+       sync_global_to_guest(vcpu->vm, pmc_results);
+
+       r = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+       sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+       uint64_t r;
+
+       vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+       r = run_vcpu_to_sync(vcpu);
+       vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+       return !r;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+{
+       bool found = false;
+       int i;
+
+       for (i = 0; i < f->nevents; i++) {
+               if (found)
+                       f->events[i - 1] = f->events[i];
+               else
+                       found = f->events[i] == event;
+       }
+       if (found)
+               f->nevents--;
+}
+
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
+do {                                                                                   \
+       uint64_t br = pmc_results.branches_retired;                                     \
+       uint64_t ir = pmc_results.instructions_retired;                                 \
+                                                                                       \
+       if (br && br != NUM_BRANCHES)                                                   \
+               pr_info("%s: Branch instructions retired = %lu (expected %u)\n",        \
+                       __func__, br, NUM_BRANCHES);                                    \
+       TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",         \
+                   __func__, br);                                                      \
+       TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)",                \
+                   __func__, ir);                                                      \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()                                         \
+do {                                                                                   \
+       uint64_t br = pmc_results.branches_retired;                                     \
+       uint64_t ir = pmc_results.instructions_retired;                                 \
+                                                                                       \
+       TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",          \
+                   __func__, br);                                                      \
+       TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)",                 \
+                   __func__, ir);                                                      \
+} while (0)
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+       run_vcpu_and_sync_pmc_results(vcpu);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_with_filter(struct kvm_vcpu *vcpu,
+                            struct __kvm_pmu_event_filter *__f)
+{
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
+       vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+       run_vcpu_and_sync_pmc_results(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = KVM_PMU_EVENT_DENY,
+               .nevents = 1,
+               .events = {
+                       RAW_EVENT(0x1C2, 0),
+               },
+       };
+
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_DENY;
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_DENY;
+
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
+
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+       struct kvm_vcpu *vcpu;
+       int r;
+       struct kvm_vm *vm;
+
+       r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+       if (!(r & KVM_PMU_CAP_DISABLE))
+               return;
+
+       vm = vm_create(1);
+
+       vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+       vcpu = vm_vcpu_add(vm, 0, guest_code);
+       TEST_ASSERT(!sanity_check_pmu(vcpu),
+                   "Guest should not be able to use disabled PMU.");
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
+ */
+static bool use_intel_pmu(void)
+{
+       return host_cpu_is_intel &&
+              kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+              kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+              kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
+}
+
+/*
+ * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
+ * 0xc2,0 for Branch Instructions Retired.
+ */
+static bool use_amd_pmu(void)
+{
+       return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+}
+
+/*
+ * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
+ * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
+ * supported on Intel Xeon processors:
+ *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
+ */
+#define MEM_INST_RETIRED               0xD0
+#define MEM_INST_RETIRED_LOAD          RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE         RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE    RAW_EVENT(MEM_INST_RETIRED, 0x83)
+
+static bool supports_event_mem_inst_retired(void)
+{
+       uint32_t eax, ebx, ecx, edx;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       if (x86_family(eax) == 0x6) {
+               switch (x86_model(eax)) {
+               /* Sapphire Rapids */
+               case 0x8F:
+               /* Ice Lake */
+               case 0x6A:
+               /* Skylake */
+               /* Cascade Lake */
+               case 0x55:
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+/*
+ * "LS Dispatch", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+#define LS_DISPATCH            0x29
+#define LS_DISPATCH_LOAD       RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE      RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
+
+#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
+       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
+#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
+       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
+
+static void masked_events_guest_test(uint32_t msr_base)
+{
+       /*
+        * The actual value of the counters don't determine the outcome of
+        * the test.  Only that they are zero or non-zero.
+        */
+       const uint64_t loads = rdmsr(msr_base + 0);
+       const uint64_t stores = rdmsr(msr_base + 1);
+       const uint64_t loads_stores = rdmsr(msr_base + 2);
+       int val;
+
+
+       __asm__ __volatile__("movl $0, %[v];"
+                            "movl %[v], %%eax;"
+                            "incl %[v];"
+                            : [v]"+m"(val) :: "eax");
+
+       pmc_results.loads = rdmsr(msr_base + 0) - loads;
+       pmc_results.stores = rdmsr(msr_base + 1) - stores;
+       pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
+}
+
+static void intel_masked_events_guest_code(void)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
+               wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
+               wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
+
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
+
+               masked_events_guest_test(MSR_IA32_PMC0);
+               GUEST_SYNC(0);
+       }
+}
+
+static void amd_masked_events_guest_code(void)
+{
+       for (;;) {
+               wrmsr(MSR_K7_EVNTSEL0, 0);
+               wrmsr(MSR_K7_EVNTSEL1, 0);
+               wrmsr(MSR_K7_EVNTSEL2, 0);
+
+               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
+               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
+               wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
+
+               masked_events_guest_test(MSR_K7_PERFCTR0);
+               GUEST_SYNC(0);
+       }
+}
+
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+                                  const uint64_t masked_events[],
+                                  const int nmasked_events)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = nmasked_events,
+               .action = KVM_PMU_EVENT_ALLOW,
+               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+       };
+
+       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+       test_with_filter(vcpu, &f);
+}
+
+#define ALLOW_LOADS            BIT(0)
+#define ALLOW_STORES           BIT(1)
+#define ALLOW_LOADS_STORES     BIT(2)
+
+struct masked_events_test {
+       uint64_t intel_events[MAX_TEST_EVENTS];
+       uint64_t intel_event_end;
+       uint64_t amd_events[MAX_TEST_EVENTS];
+       uint64_t amd_event_end;
+       const char *msg;
+       uint32_t flags;
+};
+
+/*
+ * These are the test cases for the masked events tests.
+ *
+ * For each test, the guest enables 3 PMU counters (loads, stores,
+ * loads + stores).  The filter is then set in KVM with the masked events
+ * provided.  The test then verifies that the counters agree with which
+ * ones should be counting and which ones should be filtered.
+ */
+const struct masked_events_test test_cases[] = {
+       {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+               },
+               .msg = "Only allow loads.",
+               .flags = ALLOW_LOADS,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+               },
+               .msg = "Only allow stores.",
+               .flags = ALLOW_STORES,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
+               },
+               .msg = "Only allow loads + stores.",
+               .flags = ALLOW_LOADS_STORES,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
+               },
+               .msg = "Only allow loads and stores.",
+               .flags = ALLOW_LOADS | ALLOW_STORES,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+               },
+               .msg = "Only allow loads and loads + stores.",
+               .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+               },
+               .msg = "Only allow stores and loads + stores.",
+               .flags = ALLOW_STORES | ALLOW_LOADS_STORES
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+               },
+               .msg = "Only allow loads, stores, and loads + stores.",
+               .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
+       },
+};
+
+static int append_test_events(const struct masked_events_test *test,
+                             uint64_t *events, int nevents)
+{
+       const uint64_t *evts;
+       int i;
+
+       evts = use_intel_pmu() ? test->intel_events : test->amd_events;
+       for (i = 0; i < MAX_TEST_EVENTS; i++) {
+               if (evts[i] == 0)
+                       break;
+
+               events[nevents + i] = evts[i];
+       }
+
+       return nevents + i;
+}
+
+static bool bool_eq(bool a, bool b)
+{
+       return a == b;
+}
+
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+                                   int nevents)
+{
+       int ntests = ARRAY_SIZE(test_cases);
+       int i, n;
+
+       for (i = 0; i < ntests; i++) {
+               const struct masked_events_test *test = &test_cases[i];
+
+               /* Do any test case events overflow MAX_TEST_EVENTS? */
+               assert(test->intel_event_end == 0);
+               assert(test->amd_event_end == 0);
+
+               n = append_test_events(test, events, nevents);
+
+               run_masked_events_test(vcpu, events, n);
+
+               TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+                           bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+                           bool_eq(pmc_results.loads_stores,
+                                   test->flags & ALLOW_LOADS_STORES),
+                           "%s  loads: %lu, stores: %lu, loads + stores: %lu",
+                           test->msg, pmc_results.loads, pmc_results.stores,
+                           pmc_results.loads_stores);
+       }
+}
+
+static void add_dummy_events(uint64_t *events, int nevents)
+{
+       int i;
+
+       for (i = 0; i < nevents; i++) {
+               int event_select = i % 0xFF;
+               bool exclude = ((i % 4) == 0);
+
+               if (event_select == MEM_INST_RETIRED ||
+                   event_select == LS_DISPATCH)
+                       event_select++;
+
+               events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
+                                                       0, exclude);
+       }
+}
+
+static void test_masked_events(struct kvm_vcpu *vcpu)
+{
+       int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+       uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+
+       /* Run the test cases against a sparse PMU event filter. */
+       run_masked_events_tests(vcpu, events, 0);
+
+       /* Run the test cases against a dense PMU event filter. */
+       add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
+       run_masked_events_tests(vcpu, events, nevents);
+}
+
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+                               struct __kvm_pmu_event_filter *__f)
+{
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
+       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
+
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+                                      uint32_t flags, uint32_t action)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = 1,
+               .flags = flags,
+               .action = action,
+               .events = {
+                       event,
+               },
+       };
+
+       return set_pmu_event_filter(vcpu, &f);
+}
+
+static void test_filter_ioctl(struct kvm_vcpu *vcpu)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct __kvm_pmu_event_filter f;
+       uint64_t e = ~0ul;
+       int r;
+
+       /*
+        * Unfortunately having invalid bits set in event data is expected to
+        * pass when flags == 0 (bits other than eventsel+umask).
+        */
+       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
+       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
+       TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
+
+       e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
+       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       f = base_event_filter;
+       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+       f = base_event_filter;
+       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+       f = base_event_filter;
+       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+       f = base_event_filter;
+       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
+
+               /* Only OS_EN bit is enabled for fixed counter[idx]. */
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
+       }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+                                              uint32_t action, uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = action,
+               .fixed_counter_bitmap = bitmap,
+       };
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+                                                  uint32_t action,
+                                                  uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = action;
+       f.fixed_counter_bitmap = bitmap;
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+                                       uint8_t nr_fixed_counters)
+{
+       unsigned int i;
+       uint32_t bitmap;
+       uint64_t count;
+
+       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+                   "Invalid nr_fixed_counters");
+
+       /*
+        * Check the fixed performance counter can count normally when KVM
+        * userspace doesn't set any pmu filter.
+        */
+       count = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(count, "Unexpected count value: %ld", count);
+
+       for (i = 0; i < BIT(nr_fixed_counters); i++) {
+               bitmap = BIT(i);
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+               /*
+                * Check that fixed_counter_bitmap has higher priority than
+                * events[] when both are set.
+                */
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_ALLOW,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_DENY,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+       }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint8_t idx;
+
+       /*
+        * Check that pmu_event_filter works as expected when it's applied to
+        * fixed performance counters.
+        */
+       for (idx = 0; idx < nr_fixed_counters; idx++) {
+               vm = vm_create_with_one_vcpu(&vcpu,
+                                            intel_run_fixed_counter_guest_code);
+               vcpu_args_set(vcpu, 1, idx);
+               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+               kvm_vm_free(vm);
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       void (*guest_code)(void);
+       struct kvm_vcpu *vcpu, *vcpu2 = NULL;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
+
+       TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+       guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+       if (use_amd_pmu())
+               test_amd_deny_list(vcpu);
+
+       test_without_filter(vcpu);
+       test_member_deny_list(vcpu);
+       test_member_allow_list(vcpu);
+       test_not_member_deny_list(vcpu);
+       test_not_member_allow_list(vcpu);
+
+       if (use_intel_pmu() &&
+           supports_event_mem_inst_retired() &&
+           kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
+               vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
+       else if (use_amd_pmu())
+               vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
+
+       if (vcpu2)
+               test_masked_events(vcpu2);
+       test_filter_ioctl(vcpu);
+
+       kvm_vm_free(vm);
+
+       test_pmu_config_disable(guest_code);
+       test_fixed_counter_bitmap();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c

new file mode 100644 (file)

index 0000000..82a8d88
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/memfd.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define BASE_DATA_SLOT         10
+#define BASE_DATA_GPA          ((uint64_t)(1ull << 32))
+#define PER_CPU_DATA_SIZE      ((uint64_t)(SZ_2M + PAGE_SIZE))
+
+/* Horrific macro so that the line info is captured accurately :-( */
+#define memcmp_g(gpa, pattern,  size)                                                          \
+do {                                                                                           \
+       uint8_t *mem = (uint8_t *)gpa;                                                          \
+       size_t i;                                                                               \
+                                                                                               \
+       for (i = 0; i < size; i++)                                                              \
+               __GUEST_ASSERT(mem[i] == pattern,                                               \
+                              "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x",       \
+                              pattern, i, gpa + i, mem[i]);                                    \
+} while (0)
+
+static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+{
+       size_t i;
+
+       for (i = 0; i < size; i++)
+               TEST_ASSERT(mem[i] == pattern,
+                           "Host expected 0x%x at gpa 0x%lx, got 0x%x",
+                           pattern, gpa + i, mem[i]);
+}
+
+/*
+ * Run memory conversion tests with explicit conversion:
+ * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
+ * to back/unback private memory. Subsequent accesses by guest to the gpa range
+ * will not cause exit to userspace.
+ *
+ * Test memory conversion scenarios with following steps:
+ * 1) Access private memory using private access and verify that memory contents
+ *   are not visible to userspace.
+ * 2) Convert memory to shared using explicit conversions and ensure that
+ *   userspace is able to access the shared regions.
+ * 3) Convert memory back to private using explicit conversions and ensure that
+ *   userspace is again not able to access converted private regions.
+ */
+
+#define GUEST_STAGE(o, s) { .offset = o, .size = s }
+
+enum ucall_syncs {
+       SYNC_SHARED,
+       SYNC_PRIVATE,
+};
+
+static void guest_sync_shared(uint64_t gpa, uint64_t size,
+                             uint8_t current_pattern, uint8_t new_pattern)
+{
+       GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
+}
+
+static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+{
+       GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
+}
+
+/* Arbitrary values, KVM doesn't care about the attribute flags. */
+#define MAP_GPA_SET_ATTRIBUTES BIT(0)
+#define MAP_GPA_SHARED         BIT(1)
+#define MAP_GPA_DO_FALLOCATE   BIT(2)
+
+static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+                         bool do_fallocate)
+{
+       uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+
+       if (map_shared)
+               flags |= MAP_GPA_SHARED;
+       if (do_fallocate)
+               flags |= MAP_GPA_DO_FALLOCATE;
+       kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+       guest_map_mem(gpa, size, true, do_fallocate);
+}
+
+static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+       guest_map_mem(gpa, size, false, do_fallocate);
+}
+
+struct {
+       uint64_t offset;
+       uint64_t size;
+} static const test_ranges[] = {
+       GUEST_STAGE(0, PAGE_SIZE),
+       GUEST_STAGE(0, SZ_2M),
+       GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
+       GUEST_STAGE(PAGE_SIZE, SZ_2M),
+       GUEST_STAGE(SZ_2M, PAGE_SIZE),
+};
+
+static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+{
+       const uint8_t def_p = 0xaa;
+       const uint8_t init_p = 0xcc;
+       uint64_t j;
+       int i;
+
+       /* Memory should be shared by default. */
+       memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
+       memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
+       guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
+
+       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+
+       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+               uint64_t gpa = base_gpa + test_ranges[i].offset;
+               uint64_t size = test_ranges[i].size;
+               uint8_t p1 = 0x11;
+               uint8_t p2 = 0x22;
+               uint8_t p3 = 0x33;
+               uint8_t p4 = 0x44;
+
+               /*
+                * Set the test region to pattern one to differentiate it from
+                * the data range as a whole (contains the initial pattern).
+                */
+               memset((void *)gpa, p1, size);
+
+               /*
+                * Convert to private, set and verify the private data, and
+                * then verify that the rest of the data (map shared) still
+                * holds the initial pattern, and that the host always sees the
+                * shared memory (initial pattern).  Unlike shared memory,
+                * punching a hole in private memory is destructive, i.e.
+                * previous values aren't guaranteed to be preserved.
+                */
+               guest_map_private(gpa, size, do_fallocate);
+
+               if (size > PAGE_SIZE) {
+                       memset((void *)gpa, p2, PAGE_SIZE);
+                       goto skip;
+               }
+
+               memset((void *)gpa, p2, size);
+               guest_sync_private(gpa, size, p1);
+
+               /*
+                * Verify that the private memory was set to pattern two, and
+                * that shared memory still holds the initial pattern.
+                */
+               memcmp_g(gpa, p2, size);
+               if (gpa > base_gpa)
+                       memcmp_g(base_gpa, init_p, gpa - base_gpa);
+               if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
+                       memcmp_g(gpa + size, init_p,
+                                (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
+
+               /*
+                * Convert odd-number page frames back to shared to verify KVM
+                * also correctly handles holes in private ranges.
+                */
+               for (j = 0; j < size; j += PAGE_SIZE) {
+                       if ((j >> PAGE_SHIFT) & 1) {
+                               guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
+                               guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
+
+                               memcmp_g(gpa + j, p3, PAGE_SIZE);
+                       } else {
+                               guest_sync_private(gpa + j, PAGE_SIZE, p1);
+                       }
+               }
+
+skip:
+               /*
+                * Convert the entire region back to shared, explicitly write
+                * pattern three to fill in the even-number frames before
+                * asking the host to verify (and write pattern four).
+                */
+               guest_map_shared(gpa, size, do_fallocate);
+               memset((void *)gpa, p3, size);
+               guest_sync_shared(gpa, size, p3, p4);
+               memcmp_g(gpa, p4, size);
+
+               /* Reset the shared memory back to the initial pattern. */
+               memset((void *)gpa, init_p, size);
+
+               /*
+                * Free (via PUNCH_HOLE) *all* private memory so that the next
+                * iteration starts from a clean slate, e.g. with respect to
+                * whether or not there are pages/folios in guest_mem.
+                */
+               guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
+       }
+}
+
+static void guest_punch_hole(uint64_t gpa, uint64_t size)
+{
+       /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
+       uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+
+       kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+/*
+ * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
+ * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
+ * (subsequent fault) should zero memory.
+ */
+static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+{
+       const uint8_t init_p = 0xcc;
+       int i;
+
+       /*
+        * Convert the entire range to private, this testcase is all about
+        * punching holes in guest_memfd, i.e. shared mappings aren't needed.
+        */
+       guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
+
+       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+               uint64_t gpa = base_gpa + test_ranges[i].offset;
+               uint64_t size = test_ranges[i].size;
+
+               /*
+                * Free all memory before each iteration, even for the !precise
+                * case where the memory will be faulted back in.  Freeing and
+                * reallocating should obviously work, and freeing all memory
+                * minimizes the probability of cross-testcase influence.
+                */
+               guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
+
+               /* Fault-in and initialize memory, and verify the pattern. */
+               if (precise) {
+                       memset((void *)gpa, init_p, size);
+                       memcmp_g(gpa, init_p, size);
+               } else {
+                       memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
+                       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+               }
+
+               /*
+                * Punch a hole at the target range and verify that reads from
+                * the guest succeed and return zeroes.
+                */
+               guest_punch_hole(gpa, size);
+               memcmp_g(gpa, 0, size);
+       }
+}
+
+static void guest_code(uint64_t base_gpa)
+{
+       /*
+        * Run the conversion test twice, with and without doing fallocate() on
+        * the guest_memfd backing when converting between shared and private.
+        */
+       guest_test_explicit_conversion(base_gpa, false);
+       guest_test_explicit_conversion(base_gpa, true);
+
+       /*
+        * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
+        * faulted in, once with only the target range faulted in.
+        */
+       guest_test_punch_hole(base_gpa, false);
+       guest_test_punch_hole(base_gpa, true);
+       GUEST_DONE();
+}
+
+static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint64_t gpa = run->hypercall.args[0];
+       uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+       bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
+       bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
+       bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
+       struct kvm_vm *vm = vcpu->vm;
+
+       TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
+                   "Wanted MAP_GPA_RANGE (%u), got '%llu'",
+                   KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
+
+       if (do_fallocate)
+               vm_guest_mem_fallocate(vm, gpa, size, map_shared);
+
+       if (set_attributes)
+               vm_set_memory_attributes(vm, gpa, size,
+                                        map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
+       run->hypercall.ret = 0;
+}
+
+static bool run_vcpus;
+
+static void *__test_mem_conversions(void *__vcpu)
+{
+       struct kvm_vcpu *vcpu = __vcpu;
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vm *vm = vcpu->vm;
+       struct ucall uc;
+
+       while (!READ_ONCE(run_vcpus))
+               ;
+
+       for ( ;; ) {
+               vcpu_run(vcpu);
+
+               if (run->exit_reason == KVM_EXIT_HYPERCALL) {
+                       handle_exit_hypercall(vcpu);
+                       continue;
+               }
+
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+                           run->exit_reason, exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               case UCALL_SYNC: {
+                       uint64_t gpa  = uc.args[1];
+                       size_t size = uc.args[2];
+                       size_t i;
+
+                       TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
+                                   uc.args[0] == SYNC_PRIVATE,
+                                   "Unknown sync command '%ld'", uc.args[0]);
+
+                       for (i = 0; i < size; i += vm->page_size) {
+                               size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
+                               uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+
+                               /* In all cases, the host should observe the shared data. */
+                               memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
+
+                               /* For shared, write the new pattern to guest memory. */
+                               if (uc.args[0] == SYNC_SHARED)
+                                       memset(hva, uc.args[4], nr_bytes);
+                       }
+                       break;
+               }
+               case UCALL_DONE:
+                       return NULL;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+}
+
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+                                uint32_t nr_memslots)
+{
+       /*
+        * Allocate enough memory so that each vCPU's chunk of memory can be
+        * naturally aligned with respect to the size of the backing store.
+        */
+       const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
+       const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
+       const size_t memfd_size = per_cpu_size * nr_vcpus;
+       const size_t slot_size = memfd_size / nr_memslots;
+       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+       pthread_t threads[KVM_MAX_VCPUS];
+       struct kvm_vm *vm;
+       int memfd, i, r;
+
+       const struct vm_shape shape = {
+               .mode = VM_MODE_DEFAULT,
+               .type = KVM_X86_SW_PROTECTED_VM,
+       };
+
+       TEST_ASSERT(slot_size * nr_memslots == memfd_size,
+                   "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
+                   memfd_size, nr_memslots);
+       vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
+
+       vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+
+       memfd = vm_create_guest_memfd(vm, memfd_size, 0);
+
+       for (i = 0; i < nr_memslots; i++)
+               vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+                          BASE_DATA_SLOT + i, slot_size / vm->page_size,
+                          KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+
+       for (i = 0; i < nr_vcpus; i++) {
+               uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
+
+               vcpu_args_set(vcpus[i], 1, gpa);
+
+               /*
+                * Map only what is needed so that an out-of-bounds access
+                * results #PF => SHUTDOWN instead of data corruption.
+                */
+               virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
+
+               pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
+       }
+
+       WRITE_ONCE(run_vcpus, true);
+
+       for (i = 0; i < nr_vcpus; i++)
+               pthread_join(threads[i], NULL);
+
+       kvm_vm_free(vm);
+
+       /*
+        * Allocate and free memory from the guest_memfd after closing the VM
+        * fd.  The guest_memfd is gifted a reference to its owning VM, i.e.
+        * should prevent the VM from being fully destroyed until the last
+        * reference to the guest_memfd is also put.
+        */
+       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
+       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+       close(memfd);
+}
+
+static void usage(const char *cmd)
+{
+       puts("");
+       printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
+       puts("");
+       backing_src_help("-s");
+       puts("");
+       puts(" -n: specify the number of vcpus (default: 1)");
+       puts("");
+       puts(" -m: specify the number of memslots (default: 1)");
+       puts("");
+}
+
+int main(int argc, char *argv[])
+{
+       enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
+       uint32_t nr_memslots = 1;
+       uint32_t nr_vcpus = 1;
+       int opt;
+
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+       while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
+               switch (opt) {
+               case 's':
+                       src_type = parse_backing_src_type(optarg);
+                       break;
+               case 'n':
+                       nr_vcpus = atoi_positive("nr_vcpus", optarg);
+                       break;
+               case 'm':
+                       nr_memslots = atoi_positive("nr_memslots", optarg);
+                       break;
+               case 'h':
+               default:
+                       usage(argv[0]);
+                       exit(0);
+               }
+       }
+
+       test_mem_conversions(src_type, nr_vcpus, nr_memslots);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c

new file mode 100644 (file)

index 0000000..13e72fc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <linux/kvm.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* Arbitrarily selected to avoid overlaps with anything else */
+#define EXITS_TEST_GVA 0xc0000000
+#define EXITS_TEST_GPA EXITS_TEST_GVA
+#define EXITS_TEST_NPAGES 1
+#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
+#define EXITS_TEST_SLOT 10
+
+static uint64_t guest_repeatedly_read(void)
+{
+       volatile uint64_t value;
+
+       while (true)
+               value = *((uint64_t *) EXITS_TEST_GVA);
+
+       return value;
+}
+
+static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+       int r;
+
+       r = _vcpu_run(vcpu);
+       if (r) {
+               TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
+               TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
+       }
+       return vcpu->run->exit_reason;
+}
+
+const struct vm_shape protected_vm_shape = {
+       .mode = VM_MODE_DEFAULT,
+       .type = KVM_X86_SW_PROTECTED_VM,
+};
+
+static void test_private_access_memslot_deleted(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       pthread_t vm_thread;
+       void *thread_return;
+       uint32_t exit_reason;
+
+       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+                                          guest_repeatedly_read);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
+                                   EXITS_TEST_NPAGES,
+                                   KVM_MEM_GUEST_MEMFD);
+
+       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+       /* Request to access page privately */
+       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+       pthread_create(&vm_thread, NULL,
+                      (void *(*)(void *))run_vcpu_get_exit_reason,
+                      (void *)vcpu);
+
+       vm_mem_region_delete(vm, EXITS_TEST_SLOT);
+
+       pthread_join(vm_thread, &thread_return);
+       exit_reason = (uint32_t)(uint64_t)thread_return;
+
+       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+       kvm_vm_free(vm);
+}
+
+static void test_private_access_memslot_not_private(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint32_t exit_reason;
+
+       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+                                          guest_repeatedly_read);
+
+       /* Add a non-private memslot (flags = 0) */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
+                                   EXITS_TEST_NPAGES, 0);
+
+       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+       /* Request to access page privately */
+       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+       exit_reason = run_vcpu_get_exit_reason(vcpu);
+
+       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+       test_private_access_memslot_deleted();
+       test_private_access_memslot_not_private();
+}
diff --git a/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c

new file mode 100644 (file)

index 0000000..cbc92a8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT                5       /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC   (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID   0xff
+
+static void *race(void *arg)
+{
+       struct kvm_lapic_state lapic = {};
+       struct kvm_vcpu *vcpu = arg;
+
+       while (1) {
+               /* Trigger kvm_recalculate_apic_map(). */
+               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+       struct kvm_vcpu *vcpuN;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       time_t t;
+       int i;
+
+       kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+       /*
+        * Create the max number of vCPUs supported by selftests so that KVM
+        * has decent amount of work to do when recalculating the map, i.e. to
+        * make the problematic window large enough to hit.
+        */
+       vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+       /*
+        * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+        * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+        */
+       for (i = 0; i < KVM_MAX_VCPUS; i++)
+               vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+       vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c

new file mode 100644 (file)

index 0000000..4991378
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+       GUEST_SYNC(1);
+
+       GUEST_ASSERT_NE(get_bsp_flag(), 0);
+
+       GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+       GUEST_SYNC(1);
+
+       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
+
+       GUEST_DONE();
+}
+
+static void test_set_invalid_bsp(struct kvm_vm *vm)
+{
+       unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+       int r;
+
+       if (max_vcpu_id) {
+               r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
+               TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
+       }
+
+       r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
+       TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+       int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+                          (void *)(unsigned long)vcpu->id);
+
+       TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+       int stage;
+
+       for (stage = 0; stage < 2; stage++) {
+
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                                       uc.args[1] == stage + 1,
+                                       "Stage %d: Unexpected register values vmexit, got %lx",
+                                       stage + 1, (ulong)uc.args[1]);
+                       test_set_bsp_busy(vcpu, "while running vm");
+                       break;
+               case UCALL_DONE:
+                       TEST_ASSERT(stage == 1,
+                                       "Expected GUEST_DONE in stage 2, got stage %d",
+                                       stage);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_ASSERT(false, "Unexpected exit: %s",
+                                   exit_reason_str(vcpu->run->exit_reason));
+               }
+       }
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+                               struct kvm_vcpu *vcpus[])
+{
+       struct kvm_vm *vm;
+       uint32_t i;
+
+       vm = vm_create(nr_vcpus);
+
+       test_set_invalid_bsp(vm);
+
+       vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+       for (i = 0; i < nr_vcpus; i++)
+               vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+                                                                guest_not_bsp_vcpu);
+       return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+       struct kvm_vcpu *vcpus[2];
+       struct kvm_vm *vm;
+
+       vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+       run_vcpu(vcpus[0]);
+       run_vcpu(vcpus[1]);
+
+       kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+       struct kvm_vcpu *vcpus[2];
+       struct kvm_vm *vm;
+
+       vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+       test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+       run_vcpu(vcpus[0]);
+       run_vcpu(vcpus[1]);
+
+       test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+       run_vm_bsp(0);
+       run_vm_bsp(1);
+       run_vm_bsp(0);
+
+       check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c

new file mode 100644 (file)

index 0000000..c021c07
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit)                               \
+do {                                                                           \
+       struct kvm_sregs new;                                                   \
+       int rc;                                                                 \
+                                                                               \
+       /* Skip the sub-test, the feature/bit is supported. */                  \
+       if (orig.cr & bit)                                                      \
+               break;                                                          \
+                                                                               \
+       memcpy(&new, &orig, sizeof(sregs));                                     \
+       new.cr |= bit;                                                          \
+                                                                               \
+       rc = _vcpu_sregs_set(vcpu, &new);                                       \
+       TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit);        \
+                                                                               \
+       /* Sanity check that KVM didn't change anything. */                     \
+       vcpu_sregs_get(vcpu, &new);                                             \
+       TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs");   \
+} while (0)
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+       uint64_t cr4;
+
+       cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+             X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+             X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+       if (kvm_cpu_has(X86_FEATURE_UMIP))
+               cr4 |= X86_CR4_UMIP;
+       if (kvm_cpu_has(X86_FEATURE_LA57))
+               cr4 |= X86_CR4_LA57;
+       if (kvm_cpu_has(X86_FEATURE_VMX))
+               cr4 |= X86_CR4_VMXE;
+       if (kvm_cpu_has(X86_FEATURE_SMX))
+               cr4 |= X86_CR4_SMXE;
+       if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+               cr4 |= X86_CR4_FSGSBASE;
+       if (kvm_cpu_has(X86_FEATURE_PCID))
+               cr4 |= X86_CR4_PCIDE;
+       if (kvm_cpu_has(X86_FEATURE_XSAVE))
+               cr4 |= X86_CR4_OSXSAVE;
+       if (kvm_cpu_has(X86_FEATURE_SMEP))
+               cr4 |= X86_CR4_SMEP;
+       if (kvm_cpu_has(X86_FEATURE_SMAP))
+               cr4 |= X86_CR4_SMAP;
+       if (kvm_cpu_has(X86_FEATURE_PKU))
+               cr4 |= X86_CR4_PKE;
+
+       return cr4;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_sregs sregs;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t cr4;
+       int rc, i;
+
+       /*
+        * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+        * use it to verify all supported CR4 bits can be set prior to defining
+        * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+        */
+       vm = vm_create_barebones();
+       vcpu = __vm_vcpu_add(vm, 0);
+
+       vcpu_sregs_get(vcpu, &sregs);
+
+       sregs.cr0 = 0;
+       sregs.cr4 |= calc_supported_cr4_feature_bits();
+       cr4 = sregs.cr4;
+
+       rc = _vcpu_sregs_set(vcpu, &sregs);
+       TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+                   sregs.cr4, cr4);
+
+       /* Verify all unsupported features are rejected by KVM. */
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+       for (i = 32; i < 64; i++)
+               TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+       /* NW without CD is illegal, as is PG without PE. */
+       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
+       kvm_vm_free(vm);
+
+       /* Create a "real" VM and verify APIC_BASE can be set. */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs.apic_base = 1 << 10;
+       rc = _vcpu_sregs_set(vcpu, &sregs);
+       TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+                   sregs.apic_base);
+       sregs.apic_base = 1 << 11;
+       rc = _vcpu_sregs_set(vcpu, &sregs);
+       TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+                   sregs.apic_base);
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c

new file mode 100644 (file)

index 0000000..3fb967f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SVM_SEV_FEAT_DEBUG_SWAP 32u
+
+/*
+ * Some features may have hidden dependencies, or may only work
+ * for certain VM types.  Err on the side of safety and don't
+ * expect that all supported features can be passed one by one
+ * to KVM_SEV_INIT2.
+ *
+ * (Well, right now there's only one...)
+ */
+#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
+
+int kvm_fd;
+u64 supported_vmsa_features;
+bool have_sev_es;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+       struct kvm_sev_cmd cmd = {
+               .id = cmd_id,
+               .data = (uint64_t)data,
+               .sev_fd = open_sev_dev_path_or_exit(),
+       };
+       int ret;
+
+       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+       TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
+                   "%d failed: fw error: %d\n",
+                   cmd_id, cmd.error);
+
+       return ret;
+}
+
+static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones_type(vm_type);
+       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+       TEST_ASSERT(ret == 0,
+                   "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
+                   ret, errno);
+       kvm_vm_free(vm);
+}
+
+static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones_type(vm_type);
+       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "KVM_SEV_INIT2 should fail, %s.",
+                   msg);
+       kvm_vm_free(vm);
+}
+
+void test_vm_types(void)
+{
+       test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
+
+       /*
+        * TODO: check that unsupported types cannot be created.  Probably
+        * a separate selftest.
+        */
+       if (have_sev_es)
+               test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+
+       test_init2_invalid(0, &(struct kvm_sev_init){},
+                          "VM type is KVM_X86_DEFAULT_VM");
+       if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+               test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
+                                  "VM type is KVM_X86_SW_PROTECTED_VM");
+}
+
+void test_flags(uint32_t vm_type)
+{
+       int i;
+
+       for (i = 0; i < 32; i++)
+               test_init2_invalid(vm_type,
+                       &(struct kvm_sev_init){ .flags = BIT(i) },
+                       "invalid flag");
+}
+
+void test_features(uint32_t vm_type, uint64_t supported_features)
+{
+       int i;
+
+       for (i = 0; i < 64; i++) {
+               if (!(supported_features & BIT_ULL(i)))
+                       test_init2_invalid(vm_type,
+                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
+                               "unknown feature");
+               else if (KNOWN_FEATURES & BIT_ULL(i))
+                       test_init2(vm_type,
+                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       int kvm_fd = open_kvm_dev_path_or_exit();
+       bool have_sev;
+
+       TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
+                                          KVM_X86_SEV_VMSA_FEATURES) == 0);
+       kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
+                           KVM_X86_SEV_VMSA_FEATURES,
+                           &supported_vmsa_features);
+
+       have_sev = kvm_cpu_has(X86_FEATURE_SEV);
+       TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
+                   "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
+
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
+       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+       TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
+                   "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+
+       test_vm_types();
+
+       test_flags(KVM_X86_SEV_VM);
+       if (have_sev_es)
+               test_flags(KVM_X86_SEV_ES_VM);
+
+       test_features(KVM_X86_SEV_VM, 0);
+       if (have_sev_es)
+               test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c

new file mode 100644 (file)

index 0000000..0a6dfba
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+#include "kselftest.h"
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_barebones();
+       if (!es)
+               sev_vm_init(vm);
+       else
+               sev_es_vm_init(vm);
+
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               __vm_vcpu_add(vm, i);
+
+       sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
+       if (es)
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+       return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_barebones();
+       if (!with_vcpus)
+               return vm;
+
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               __vm_vcpu_add(vm, i);
+
+       return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       int ret;
+
+       ret = __sev_migrate_from(dst, src);
+       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+       struct kvm_vm *src_vm;
+       struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+       int i, ret;
+
+       src_vm = sev_vm_create(es);
+       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+               dst_vms[i] = aux_vm_create(true);
+
+       /* Initial migration from the src to the first dst. */
+       sev_migrate_from(dst_vms[0], src_vm);
+
+       for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+               sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+       /* Migrate the guest back to the original VM. */
+       ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+       TEST_ASSERT(ret == -1 && errno == EIO,
+                   "VM that was migrated from should be dead. ret %d, errno: %d", ret,
+                   errno);
+
+       kvm_vm_free(src_vm);
+       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+               kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+       struct kvm_vm *vm;
+       struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+       int i, j;
+       struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+       for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+               j = i % NR_LOCK_TESTING_THREADS;
+               __sev_migrate_from(input->vm, input->source_vms[j]);
+       }
+
+       return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+       struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+       pthread_t pt[NR_LOCK_TESTING_THREADS];
+       int i;
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+               input[i].vm = sev_vm_create(/* es= */ false);
+               input[0].source_vms[i] = input[i].vm;
+       }
+       for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+               memcpy(input[i].source_vms, input[0].source_vms,
+                      sizeof(input[i].source_vms));
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               pthread_join(pt[i], NULL);
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+               *sev_es_vm_no_vmsa;
+       int ret;
+
+       vm_no_vcpu = vm_create_barebones();
+       vm_no_sev = aux_vm_create(true);
+       ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "Migrations require SEV enabled. ret %d, errno: %d", ret,
+                   errno);
+
+       if (!have_sev_es)
+               goto out;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       sev_es_vm = sev_vm_create(/* es= */ true);
+       sev_es_vm_no_vmsa = vm_create_barebones();
+       sev_es_vm_init(sev_es_vm_no_vmsa);
+       __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+       ret = __sev_migrate_from(sev_vm, sev_es_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_migrate_from(sev_es_vm, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
+               ret, errno);
+
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(sev_es_vm);
+       kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+       kvm_vm_free(vm_no_vcpu);
+       kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       int ret;
+
+       ret = __sev_mirror_create(dst, src);
+       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
+{
+       struct kvm_sev_guest_status status;
+       int cmd_id;
+
+       for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+               int ret;
+
+               /*
+                * These commands are allowed for mirror VMs, all others are
+                * not.
+                */
+               switch (cmd_id) {
+               case KVM_SEV_LAUNCH_UPDATE_VMSA:
+               case KVM_SEV_GUEST_STATUS:
+               case KVM_SEV_DBG_DECRYPT:
+               case KVM_SEV_DBG_ENCRYPT:
+                       continue;
+               default:
+                       break;
+               }
+
+               /*
+                * These commands should be disallowed before the data
+                * parameter is examined so NULL is OK here.
+                */
+               ret = __vm_sev_ioctl(vm, cmd_id, NULL);
+               TEST_ASSERT(
+                       ret == -1 && errno == EINVAL,
+                       "Should not be able call command: %d. ret: %d, errno: %d",
+                       cmd_id, ret, errno);
+       }
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+       struct kvm_vm *src_vm, *dst_vm;
+       int i;
+
+       src_vm = sev_vm_create(es);
+       dst_vm = aux_vm_create(false);
+
+       sev_mirror_create(dst_vm, src_vm);
+
+       /* Check that we can complete creation of the mirror VM.  */
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               __vm_vcpu_add(dst_vm, i);
+
+       if (es)
+               vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       verify_mirror_allowed_cmds(dst_vm);
+
+       kvm_vm_free(src_vm);
+       kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+       int ret;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       vm_with_vcpu = aux_vm_create(true);
+       vm_no_vcpu = aux_vm_create(false);
+
+       ret = __sev_mirror_create(sev_vm, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to self. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "Copy context requires SEV enabled. ret %d, errno: %d", ret,
+                   errno);
+
+       ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
+               ret, errno);
+
+       if (!have_sev_es)
+               goto out;
+
+       sev_es_vm = sev_vm_create(/* es= */ true);
+       ret = __sev_mirror_create(sev_vm, sev_es_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_mirror_create(sev_es_vm, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       kvm_vm_free(sev_es_vm);
+
+out:
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(vm_with_vcpu);
+       kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+       struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+                     *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       dst_vm = aux_vm_create(true);
+       dst2_vm = aux_vm_create(true);
+       dst3_vm = aux_vm_create(true);
+       mirror_vm = aux_vm_create(false);
+       dst_mirror_vm = aux_vm_create(false);
+       dst2_mirror_vm = aux_vm_create(false);
+       dst3_mirror_vm = aux_vm_create(false);
+
+       sev_mirror_create(mirror_vm, sev_vm);
+
+       sev_migrate_from(dst_mirror_vm, mirror_vm);
+       sev_migrate_from(dst_vm, sev_vm);
+
+       sev_migrate_from(dst2_vm, dst_vm);
+       sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+       sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+       sev_migrate_from(dst3_vm, dst2_vm);
+
+       kvm_vm_free(dst_vm);
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(dst2_vm);
+       kvm_vm_free(dst3_vm);
+       kvm_vm_free(mirror_vm);
+       kvm_vm_free(dst_mirror_vm);
+       kvm_vm_free(dst2_mirror_vm);
+       kvm_vm_free(dst3_mirror_vm);
+
+       /*
+        * Run similar test be destroy mirrors before mirrored VMs to ensure
+        * destruction is done safely.
+        */
+       sev_vm = sev_vm_create(/* es= */ false);
+       dst_vm = aux_vm_create(true);
+       mirror_vm = aux_vm_create(false);
+       dst_mirror_vm = aux_vm_create(false);
+
+       sev_mirror_create(mirror_vm, sev_vm);
+
+       sev_migrate_from(dst_mirror_vm, mirror_vm);
+       sev_migrate_from(dst_vm, sev_vm);
+
+       kvm_vm_free(mirror_vm);
+       kvm_vm_free(dst_mirror_vm);
+       kvm_vm_free(dst_vm);
+       kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+       if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+               test_sev_migrate_from(/* es= */ false);
+               if (have_sev_es)
+                       test_sev_migrate_from(/* es= */ true);
+               test_sev_migrate_locking();
+               test_sev_migrate_parameters();
+               if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+                       test_sev_move_copy();
+       }
+       if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+               test_sev_mirror(/* es= */ false);
+               if (have_sev_es)
+                       test_sev_mirror(/* es= */ true);
+               test_sev_mirror_parameters();
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c

new file mode 100644 (file)

index 0000000..ae77698
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <math.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+
+static void guest_sev_es_code(void)
+{
+       /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+       /*
+        * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
+        * force "termination" to signal "done" via the GHCB MSR protocol.
+        */
+       wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+       __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+       GUEST_DONE();
+}
+
+/* Stash state passed via VMSA before any compiled code runs.  */
+extern void guest_code_xsave(void);
+asm("guest_code_xsave:\n"
+    "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
+    "xor %edx, %edx\n"
+    "xsave (%rdi)\n"
+    "jmp guest_sev_es_code");
+
+static void compare_xsave(u8 *from_host, u8 *from_guest)
+{
+       int i;
+       bool bad = false;
+       for (i = 0; i < 4095; i++) {
+               if (from_host[i] != from_guest[i]) {
+                       printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+                       bad = true;
+               }
+       }
+
+       if (bad)
+               abort();
+}
+
+static void test_sync_vmsa(uint32_t policy)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       vm_vaddr_t gva;
+       void *hva;
+
+       double x87val = M_PI;
+       struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
+
+       vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+       gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+                                   MEM_REGION_TEST_DATA);
+       hva = addr_gva2hva(vm, gva);
+
+       vcpu_args_set(vcpu, 1, gva);
+
+       asm("fninit\n"
+           "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
+           "fldl %3\n"
+           "xsave (%2)\n"
+           "fstp %%st\n"
+           : "=m"(xsave)
+           : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
+           : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
+       vcpu_xsave_set(vcpu, &xsave);
+
+       vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+
+       /* This page is shared, so make it decrypted.  */
+       memset(hva, 0, 4096);
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+                   "Wanted SYSTEM_EVENT, got %s",
+                   exit_reason_str(vcpu->run->exit_reason));
+       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+
+       compare_xsave((u8 *)&xsave, (u8 *)hva);
+
+       kvm_vm_free(vm);
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
+
+       vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
+
+       /* TODO: Validate the measurement is as expected. */
+       vm_sev_launch(vm, policy, NULL);
+
+       for (;;) {
+               vcpu_run(vcpu);
+
+               if (policy & SEV_POLICY_ES) {
+                       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+                                   "Wanted SYSTEM_EVENT, got %s",
+                                   exit_reason_str(vcpu->run->exit_reason));
+                       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+                       break;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       continue;
+               case UCALL_DONE:
+                       return;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected exit: %s",
+                                 exit_reason_str(vcpu->run->exit_reason));
+               }
+       }
+
+       kvm_vm_free(vm);
+}
+
+static void guest_shutdown_code(void)
+{
+       struct desc_ptr idt;
+
+       /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
+       memset(&idt, 0, sizeof(idt));
+       __asm__ __volatile__("lidt %0" :: "m"(idt));
+
+       __asm__ __volatile__("ud2");
+}
+
+static void test_sev_es_shutdown(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       uint32_t type = KVM_X86_SEV_ES_VM;
+
+       vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
+
+       vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
+                   "Wanted SHUTDOWN, got %s",
+                   exit_reason_str(vcpu->run->exit_reason));
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       const u64 xf_mask = XFEATURE_MASK_X87_AVX;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+       test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+       test_sev(guest_sev_code, 0);
+
+       if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+               test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+               test_sev(guest_sev_es_code, SEV_POLICY_ES);
+
+               test_sev_es_shutdown();
+
+               if (kvm_has_cap(KVM_CAP_XCRS) &&
+                   (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+                       test_sync_vmsa(0);
+                       test_sync_vmsa(SEV_POLICY_NO_DBG);
+               }
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c

new file mode 100644 (file)

index 0000000..fabeead
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
+ */
+#include "flds_emulation.h"
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT        10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(bool tdp_enabled)
+{
+       uint64_t error_code;
+       uint64_t vector;
+
+       vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+       /*
+        * When TDP is enabled, flds will trigger an emulation failure, exit to
+        * userspace, and then the selftest host "VMM" skips the instruction.
+        *
+        * When TDP is disabled, no instruction emulation is required so flds
+        * should generate #PF(RSVD).
+        */
+       if (tdp_enabled) {
+               GUEST_ASSERT(!vector);
+       } else {
+               GUEST_ASSERT_EQ(vector, PF_VECTOR);
+               GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+       }
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       uint64_t *pte;
+       uint64_t *hva;
+       uint64_t gpa;
+       int rc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
+
+       rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+       TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   MEM_REGION_GPA, MEM_REGION_SLOT,
+                                   MEM_REGION_SIZE / PAGE_SIZE, 0);
+       gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+                                MEM_REGION_GPA, MEM_REGION_SLOT);
+       TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
+       virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+       hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+       memset(hva, 0, PAGE_SIZE);
+
+       pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
+       *pte |= BIT_ULL(MAXPHYADDR);
+
+       vcpu_run(vcpu);
+
+       /*
+        * When TDP is enabled, KVM must emulate in response the guest physical
+        * address that is illegal from the guest's perspective, but is legal
+        * from hardware's perspeective.  This should result in an emulation
+        * failure exit to userspace since KVM doesn't support emulating flds.
+        */
+       if (kvm_is_tdp_enabled()) {
+               handle_flds_emulation_failure_exit(vcpu);
+               vcpu_run(vcpu);
+       }
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c

new file mode 100644 (file)

index 0000000..55c88d6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+#include "svm_util.h"
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+       0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
+       0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
+       0x0f, 0xaa,           /* rsm */
+};
+
+static inline void sync_with_host(uint64_t phase)
+{
+       asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+                    : "+a" (phase));
+}
+
+static void self_smi(void)
+{
+       x2apic_write_reg(APIC_ICR,
+                        APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+static void l2_guest_code(void)
+{
+       sync_with_host(8);
+
+       sync_with_host(10);
+
+       vmcall();
+}
+
+static void guest_code(void *arg)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+       struct svm_test_data *svm = arg;
+       struct vmx_pages *vmx_pages = arg;
+
+       sync_with_host(1);
+
+       wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+       sync_with_host(2);
+
+       self_smi();
+
+       sync_with_host(4);
+
+       if (arg) {
+               if (this_cpu_has(X86_FEATURE_SVM)) {
+                       generic_svm_setup(svm, l2_guest_code,
+                                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+               } else {
+                       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+                       GUEST_ASSERT(load_vmcs(vmx_pages));
+                       prepare_vmcs(vmx_pages, l2_guest_code,
+                                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+               }
+
+               sync_with_host(5);
+
+               self_smi();
+
+               sync_with_host(7);
+
+               if (this_cpu_has(X86_FEATURE_SVM)) {
+                       run_guest(svm->vmcb, svm->vmcb_gpa);
+                       run_guest(svm->vmcb, svm->vmcb_gpa);
+               } else {
+                       vmlaunch();
+                       vmresume();
+               }
+
+               /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+               sync_with_host(12);
+       }
+
+       sync_with_host(DONE);
+}
+
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_events events;
+
+       vcpu_events_get(vcpu, &events);
+
+       events.smi.pending = 1;
+       events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+       vcpu_events_set(vcpu, &events);
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t nested_gva = 0;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       struct kvm_x86_state *state;
+       int stage, stage_reported;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+                                   SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+       TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+                   == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+       memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+       memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+              sizeof(smi_handler));
+
+       vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+
+       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+               if (kvm_cpu_has(X86_FEATURE_SVM))
+                       vcpu_alloc_svm(vm, &nested_gva);
+               else if (kvm_cpu_has(X86_FEATURE_VMX))
+                       vcpu_alloc_vmx(vm, &nested_gva);
+       }
+
+       if (!nested_gva)
+               pr_info("will skip SMM test with VMX enabled\n");
+
+       vcpu_args_set(vcpu, 1, nested_gva);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               memset(&regs, 0, sizeof(regs));
+               vcpu_regs_get(vcpu, &regs);
+
+               stage_reported = regs.rax & 0xff;
+
+               if (stage_reported == DONE)
+                       goto done;
+
+               TEST_ASSERT(stage_reported == stage ||
+                           stage_reported == SMRAM_STAGE,
+                           "Unexpected stage: #%x, got %x",
+                           stage, stage_reported);
+
+               /*
+                * Enter SMM during L2 execution and check that we correctly
+                * return from it. Do not perform save/restore while in SMM yet.
+                */
+               if (stage == 8) {
+                       inject_smi(vcpu);
+                       continue;
+               }
+
+               /*
+                * Perform save/restore while the guest is in SMM triggered
+                * during L2 execution.
+                */
+               if (stage == 10)
+                       inject_smi(vcpu);
+
+               state = vcpu_save_state(vcpu);
+               kvm_vm_release(vm);
+
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+               kvm_x86_state_cleanup(state);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c

new file mode 100644 (file)

index 0000000..141b7fc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+void svm_l2_guest_code(void)
+{
+       GUEST_SYNC(4);
+       /* Exit to L1 */
+       vmcall();
+       GUEST_SYNC(6);
+       /* Done, exit to L1 and never come back.  */
+       vmcall();
+}
+
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       GUEST_ASSERT(svm->vmcb_gpa);
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, svm_l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(3);
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(5);
+       vmcb->save.rip += 3;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+       GUEST_SYNC(6);
+
+       /* Exit to L1 */
+       vmcall();
+
+       /* L1 has now set up a shadow VMCS for us.  */
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+       GUEST_SYNC(10);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+       GUEST_SYNC(11);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+       GUEST_SYNC(12);
+
+       /* Done, exit to L1 and never come back.  */
+       vmcall();
+}
+
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_SYNC(3);
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+       GUEST_SYNC(4);
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+       prepare_vmcs(vmx_pages, vmx_l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(5);
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       /* Check that the launched state is preserved.  */
+       GUEST_ASSERT(vmlaunch());
+
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_SYNC(7);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+       vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+       GUEST_ASSERT(vmlaunch());
+       GUEST_SYNC(8);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(vmresume());
+
+       vmwrite(GUEST_RIP, 0xc0ffee);
+       GUEST_SYNC(9);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+       GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(vmresume());
+       GUEST_SYNC(13);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(vmresume());
+}
+
+static void __attribute__((__flatten__)) guest_code(void *arg)
+{
+       GUEST_SYNC(1);
+
+       if (this_cpu_has(X86_FEATURE_XSAVE)) {
+               uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+               uint8_t buffer[4096];
+
+               memset(buffer, 0xcc, sizeof(buffer));
+
+               /*
+                * Modify state for all supported xfeatures to take them out of
+                * their "init" state, i.e. to make them show up in XSTATE_BV.
+                *
+                * Note off-by-default features, e.g. AMX, are out of scope for
+                * this particular testcase as they have a different ABI.
+                */
+               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+               asm volatile ("fincstp");
+
+               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+               asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+               if (supported_xcr0 & XFEATURE_MASK_YMM)
+                       asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+               if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+                       asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+                       asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+                       asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+               }
+
+               if (this_cpu_has(X86_FEATURE_MPX)) {
+                       uint64_t bounds[2] = { 10, 0xffffffffull };
+                       uint64_t output[2] = { };
+
+                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+                       /*
+                        * Don't bother trying to get BNDCSR into the INUSE
+                        * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
+                        * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+                        * modified by XRSTOR.  Stuffing XSTATE_BV in the host
+                        * is simpler than doing XRSTOR here in the guest.
+                        *
+                        * However, temporarily enable MPX in BNDCFGS so that
+                        * BNDMOV actually loads BND1.  If MPX isn't *fully*
+                        * enabled, all MPX instructions are treated as NOPs.
+                        *
+                        * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+                        * mnemonics/registers has been removed from gcc and
+                        * clang (and was never fully supported by clang).
+                        */
+                       wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+                       asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+                       /*
+                        * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+                        * that BND1 actually got loaded.
+                        */
+                       asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+                       wrmsr(MSR_IA32_BNDCFGS, 0);
+
+                       GUEST_ASSERT_EQ(bounds[0], output[0]);
+                       GUEST_ASSERT_EQ(bounds[1], output[1]);
+               }
+               if (this_cpu_has(X86_FEATURE_PKU)) {
+                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+                       set_cr4(get_cr4() | X86_CR4_PKE);
+                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+                       wrpkru(-1u);
+               }
+       }
+
+       GUEST_SYNC(2);
+
+       if (arg) {
+               if (this_cpu_has(X86_FEATURE_SVM))
+                       svm_l1_guest_code(arg);
+               else
+                       vmx_l1_guest_code(arg);
+       }
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       uint64_t *xstate_bv, saved_xstate_bv;
+       vm_vaddr_t nested_gva = 0;
+       struct kvm_cpuid2 empty_cpuid = {};
+       struct kvm_regs regs1, regs2;
+       struct kvm_vcpu *vcpu, *vcpuN;
+       struct kvm_vm *vm;
+       struct kvm_x86_state *state;
+       struct ucall uc;
+       int stage;
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vcpu_regs_get(vcpu, &regs1);
+
+       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+               if (kvm_cpu_has(X86_FEATURE_SVM))
+                       vcpu_alloc_svm(vm, &nested_gva);
+               else if (kvm_cpu_has(X86_FEATURE_VMX))
+                       vcpu_alloc_vmx(vm, &nested_gva);
+       }
+
+       if (!nested_gva)
+               pr_info("will skip nested state checks\n");
+
+       vcpu_args_set(vcpu, 1, nested_gva);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               state = vcpu_save_state(vcpu);
+               memset(&regs1, 0, sizeof(regs1));
+               vcpu_regs_get(vcpu, &regs1);
+
+               kvm_vm_release(vm);
+
+               /* Restore state in a new VM.  */
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+
+               /*
+                * Restore XSAVE state in a dummy vCPU, first without doing
+                * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
+                * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+                * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
+                * load only XSAVE state, MSRs in particular have a much more
+                * convoluted ABI.
+                *
+                * Load two versions of XSAVE state: one with the actual guest
+                * XSAVE state, and one with all supported features forced "on"
+                * in xstate_bv, e.g. to ensure that KVM allows loading all
+                * supported features, even if something goes awry in saving
+                * the original snapshot.
+                */
+               xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+               saved_xstate_bv = *xstate_bv;
+
+               vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+               vcpu_xsave_set(vcpuN, state->xsave);
+               *xstate_bv = kvm_cpu_supported_xcr0();
+               vcpu_xsave_set(vcpuN, state->xsave);
+
+               vcpu_init_cpuid(vcpuN, &empty_cpuid);
+               vcpu_xsave_set(vcpuN, state->xsave);
+               *xstate_bv = saved_xstate_bv;
+               vcpu_xsave_set(vcpuN, state->xsave);
+
+               kvm_x86_state_cleanup(state);
+
+               memset(&regs2, 0, sizeof(regs2));
+               vcpu_regs_get(vcpu, &regs2);
+               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                           (ulong) regs2.rdi, (ulong) regs2.rsi);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c

new file mode 100644 (file)

index 0000000..916e042
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+       vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+       x2apic_write_reg(APIC_EOI, 0x00);
+       intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+        * and since L1 didn't enable virtual interrupt masking,
+        * L2 should receive it and not L1.
+        *
+        * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+        * so it should also receive it after the following 'sti'.
+        */
+       x2apic_write_reg(APIC_ICR,
+               APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+       __asm__ __volatile__(
+               "sti\n"
+               "nop\n"
+       );
+
+       GUEST_ASSERT(vintr_irq_called);
+       GUEST_ASSERT(intr_irq_called);
+
+       __asm__ __volatile__(
+               "vmcall\n"
+       );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       x2apic_enable();
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* No virtual interrupt masking */
+       vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+       /* No intercepts for real and virtual interrupts */
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+       /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+       vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+       vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t svm_gva;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+       vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+       vcpu_args_set(vcpu, 1, svm_gva);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+               /* NOT REACHED */
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c

new file mode 100644 (file)

index 0000000..00135cb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+       idt[6].p   = 0; // #UD is intercepted but its injection will cause #NP
+       idt[11].p  = 0; // #NP is not intercepted and will cause another
+                       // #NP that will be converted to #DF
+       idt[8].p   = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       /* should not reach here */
+       GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t svm_gva;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vcpu_alloc_svm(vm, &svm_gva);
+
+       vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c

new file mode 100644 (file)

index 0000000..7b6481d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ *   svm_int_ctl_test
+ *
+ *   Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR                 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+       bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+       int_fired++;
+       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+       GUEST_ASSERT_EQ(int_fired, 1);
+
+       /*
+         * Same as the vmmcall() function, but with a ud2 sneaked after the
+         * vmmcall.  The caller injects an exception with the return address
+         * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
+        * use vmmcall() directly.
+         */
+       __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
+                             : : "a"(0xdeadbeef), "c"(0xbeefdead)
+                             : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                               "r10", "r11", "r12", "r13", "r14", "r15");
+
+       GUEST_ASSERT_EQ(bp_fired, 1);
+       hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+       nmi_stage_inc();
+
+       if (nmi_stage_get() == 1) {
+               vmmcall();
+               GUEST_FAIL("Unexpected resume after VMMCALL");
+       } else {
+               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
+               GUEST_DONE();
+       }
+}
+
+static void l2_guest_code_nmi(void)
+{
+       ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       if (is_nmi)
+               x2apic_enable();
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm,
+                         is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+       vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+       if (is_nmi) {
+               vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+       } else {
+               vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+               /* The return address pushed on stack */
+               vmcb->control.next_rip = vmcb->save.rip;
+       }
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+                      vmcb->control.exit_code,
+                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+       if (is_nmi) {
+               clgi();
+               x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
+               nmi_stage_inc();
+
+               stgi();
+               /* self-NMI happens here */
+               while (true)
+                       cpu_relax();
+       }
+
+       /* Skip over VMMCALL */
+       vmcb->save.rip += 3;
+
+       /* Switch to alternate IDT to cause intervening NPF again */
+       vmcb->save.idtr.base = idt_alt;
+       vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+       vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+       /* The return address pushed on stack, skip over UD2 */
+       vmcb->control.next_rip = vmcb->save.rip + 2;
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+                      vmcb->control.exit_code,
+                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+       GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       vm_vaddr_t svm_gva;
+       vm_vaddr_t idt_alt_vm;
+       struct kvm_guest_debug debug;
+
+       pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+       vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+       vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+
+       if (!is_nmi) {
+               void *idt, *idt_alt;
+
+               idt_alt_vm = vm_vaddr_alloc_page(vm);
+               idt_alt = addr_gva2hva(vm, idt_alt_vm);
+               idt = addr_gva2hva(vm, vm->arch.idt);
+               memcpy(idt_alt, idt, getpagesize());
+       } else {
+               idt_alt_vm = 0;
+       }
+       vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+       memset(&debug, 0, sizeof(debug));
+       vcpu_guest_debug_set(vcpu, &debug);
+
+       struct ucall uc;
+
+       alarm(2);
+       vcpu_run(vcpu);
+       alarm(0);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+               /* NOT REACHED */
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+       }
+done:
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+                   "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+       atomic_init(&nmi_stage, 0);
+
+       run_test(false);
+       run_test(true);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c

new file mode 100644 (file)

index 0000000..8a62cca
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * Nested SVM testing: VMCALL
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t svm_gva;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+       vcpu_args_set(vcpu, 1, svm_gva);
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c

new file mode 100644 (file)

index 0000000..8fa3948
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+struct ucall uc_none = {
+       .cmd = UCALL_NONE,
+};
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
+void guest_code(void)
+{
+       asm volatile("1: in %[port], %%al\n"
+                    "add $0x1, %%rbx\n"
+                    "jmp 1b"
+                    : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+                    : "rax", "rbx");
+}
+
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+       TEST_ASSERT(left->reg == right->reg, \
+                   "Register " #reg \
+                   " values did not match: 0x%llx, 0x%llx", \
+                   left->reg, right->reg)
+       REG_COMPARE(rax);
+       REG_COMPARE(rbx);
+       REG_COMPARE(rcx);
+       REG_COMPARE(rdx);
+       REG_COMPARE(rsi);
+       REG_COMPARE(rdi);
+       REG_COMPARE(rsp);
+       REG_COMPARE(rbp);
+       REG_COMPARE(r8);
+       REG_COMPARE(r9);
+       REG_COMPARE(r10);
+       REG_COMPARE(r11);
+       REG_COMPARE(r12);
+       REG_COMPARE(r13);
+       REG_COMPARE(r14);
+       REG_COMPARE(r15);
+       REG_COMPARE(rip);
+       REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+                               struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.injected, 1);
+               WRITE_ONCE(events->exception.pending, 1);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events.  KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.nr, UD_VECTOR);
+               WRITE_ONCE(events->exception.pending, 1);
+               WRITE_ONCE(events->exception.nr, 255);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       __u64 *cr4 = &run->s.regs.sregs.cr4;
+       __u64 pae_enabled = *cr4;
+       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+               WRITE_ONCE(*cr4, pae_enabled);
+               asm volatile(".rept 512\n\t"
+                            "nop\n\t"
+                            ".endr");
+               WRITE_ONCE(*cr4, pae_disabled);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
+{
+       const time_t TIMEOUT = 2; /* seconds, roughly */
+       struct kvm_x86_state *state;
+       struct kvm_translation tr;
+       struct kvm_run *run;
+       pthread_t thread;
+       time_t t;
+
+       run = vcpu->run;
+
+       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       run->kvm_valid_regs = 0;
+
+       /* Save state *before* spawning the thread that mucks with vCPU state. */
+       state = vcpu_save_state(vcpu);
+
+       /*
+        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+        * should already be set in guest state.
+        */
+       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+                   (run->s.regs.sregs.efer & EFER_LME),
+                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+                   !!(run->s.regs.sregs.efer & EFER_LME));
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               /*
+                * Reload known good state if the vCPU triple faults, e.g. due
+                * to the unhandled #GPs being injected.  VMX preserves state
+                * on shutdown, but SVM synthesizes an INIT as the VMCB state
+                * is architecturally undefined on triple fault.
+                */
+               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+                       vcpu_load_state(vcpu, state);
+
+               if (racer == race_sregs_cr4) {
+                       tr = (struct kvm_translation) { .linear_address = 0 };
+                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+               }
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request reading invalid register set from VCPU. */
+       run->kvm_valid_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+
+       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request setting invalid register set into VCPU. */
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+
+       /* Request and verify all valid register sets. */
+       /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs.sregs);
+
+       vcpu_events_get(vcpu, &events);
+       compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       /* Set and verify various register values. */
+       run->s.regs.regs.rbx = 0xBAD1DEA;
+       run->s.regs.sregs.apic_base = 1 << 11;
+       /* TODO run->s.regs.events.XYZ = ABC; */
+
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+       TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+                   "apic_base sync regs value incorrect 0x%llx.",
+                   run->s.regs.sregs.apic_base);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs.sregs);
+
+       vcpu_events_get(vcpu, &events);
+       compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+
+       /* Clear kvm_dirty_regs bits, verify new s.regs values are
+        * overwritten with existing guest values.
+        */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = 0;
+       run->s.regs.regs.rbx = 0xDEADBEEF;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+        * Verify s.regs values are not overwritten with existing guest values
+        * and that guest values are not overwritten with kvm_sync_regs values.
+        */
+       run->kvm_valid_regs = 0;
+       run->kvm_dirty_regs = 0;
+       run->s.regs.regs.rbx = 0xAAAA;
+       vcpu_regs_get(vcpu, &regs);
+       regs.rbx = 0xBAC0;
+       vcpu_regs_set(vcpu, &regs);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+       vcpu_regs_get(vcpu, &regs);
+       TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
+                   "rbx guest value incorrect 0x%llx.",
+                   regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+        * with existing guest values but that guest values are overwritten
+        * with kvm_sync_regs values.
+        */
+       run->kvm_valid_regs = 0;
+       run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+       run->s.regs.regs.rbx = 0xBBBB;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+       vcpu_regs_get(vcpu, &regs);
+       TEST_ASSERT(regs.rbx == 0xBBBB + 1,
+                   "rbx guest value incorrect 0x%llx.",
+                   regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+       race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+       race_sync_regs(vcpu, race_events_exc);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+       race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+       int cap;
+
+       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
+
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c

new file mode 100644 (file)

index 0000000..56306a1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT      0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+#define L2_GUEST_STACK_SIZE 64
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+
+       prepare_vmcs(vmx, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_ASSERT(!vmlaunch());
+       /* L2 should triple fault after a triple fault event injected. */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+       GUEST_DONE();
+}
+
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+       struct vmcb *vmcb = svm->vmcb;
+
+       generic_svm_setup(svm, l2_guest_code,
+                       &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* don't intercept shutdown to test the case of SVM allowing to do so */
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       /* should not reach here, L1 should crash  */
+       GUEST_ASSERT(0);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vcpu_events events;
+       struct ucall uc;
+
+       bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+       bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+       TEST_REQUIRE(has_vmx || has_svm);
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+
+       if (has_vmx) {
+               vm_vaddr_t vmx_pages_gva;
+
+               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+               vcpu_alloc_vmx(vm, &vmx_pages_gva);
+               vcpu_args_set(vcpu, 1, vmx_pages_gva);
+       } else {
+               vm_vaddr_t svm_gva;
+
+               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+               vcpu_alloc_svm(vm, &svm_gva);
+               vcpu_args_set(vcpu, 1, svm_gva);
+       }
+
+       vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+       run = vcpu->run;
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+                   "Expected IN from port %d from L2, got port %d",
+                   ARBITRARY_IO_PORT, run->io.port);
+       vcpu_events_get(vcpu, &events);
+       events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+       events.triple_fault.pending = true;
+       vcpu_events_set(vcpu, &events);
+       run->immediate_exit = true;
+       vcpu_run_complete_io(vcpu);
+
+       vcpu_events_get(vcpu, &events);
+       TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+                   "Triple fault event invalid");
+       TEST_ASSERT(events.triple_fault.pending,
+                   "No triple fault pending");
+       vcpu_run(vcpu);
+
+
+       if (has_svm) {
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+       } else {
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_DONE:
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c

new file mode 100644 (file)

index 0000000..12b0964
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UNITY                  (1ull << 30)
+#define HOST_ADJUST            (UNITY * 64)
+#define GUEST_STEP             (UNITY * 4)
+#define ROUND(x)               ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x)       ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vcpu, x))
+
+static void guest_code(void)
+{
+       u64 val = 0;
+
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+       val = 1ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+       GUEST_SYNC(2);
+       val = 2ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC_ADJUST, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Host: setting the TSC offset.  */
+       GUEST_SYNC(3);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+        * host-side offset and affect both MSRs.
+        */
+       GUEST_SYNC(4);
+       val = 3ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC_ADJUST, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+        * offset is now visible in MSR_IA32_TSC_ADJUST.
+        */
+       GUEST_SYNC(5);
+       val = 4ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+       GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               if (!strcmp((const char *)uc.args[0], "hello") &&
+                   uc.args[1] == stage + 1)
+                       ksft_test_result_pass("stage %d passed\n", stage + 1);
+               else
+                       ksft_test_result_fail(
+                               "stage %d: Unexpected register values vmexit, got %lx",
+                               stage + 1, (ulong)uc.args[1]);
+               return;
+       case UCALL_DONE:
+               ksft_test_result_pass("stage %d passed\n", stage + 1);
+               return;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_ASSERT(false, "Unexpected exit: %s",
+                           exit_reason_str(vcpu->run->exit_reason));
+       }
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t val;
+
+       ksft_print_header();
+       ksft_set_plan(5);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       val = 0;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+       run_vcpu(vcpu, 1);
+       val = 1ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+       run_vcpu(vcpu, 2);
+       val = 2ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Host: writes to MSR_IA32_TSC set the host-side offset
+        * and therefore do not change MSR_IA32_TSC_ADJUST.
+        */
+       vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       run_vcpu(vcpu, 3);
+
+       /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
+       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+       /* Restore previous value.  */
+       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+        * host-side offset and affect both MSRs.
+        */
+       run_vcpu(vcpu, 4);
+       val = 3ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+        * offset is now visible in MSR_IA32_TSC_ADJUST.
+        */
+       run_vcpu(vcpu, 5);
+       val = 4ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+       kvm_vm_free(vm);
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c

new file mode 100644 (file)

index 0000000..59c7304
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ    2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+       uint64_t start_tsc, local_tsc, tmp;
+
+       start_tsc = rdtsc();
+       do {
+               tmp = READ_ONCE(tsc_sync);
+               local_tsc = rdtsc();
+               WRITE_ONCE(tsc_sync, local_tsc);
+               if (unlikely(local_tsc < tmp))
+                       GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+       } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+       GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+       unsigned long vcpu_id = (unsigned long)_cpu_nr;
+       unsigned long failures = 0;
+       static bool first_cpu_done;
+       struct kvm_vcpu *vcpu;
+
+       /* The kernel is fine, but vm_vcpu_add() needs locking */
+       pthread_spin_lock(&create_lock);
+
+       vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+       if (!first_cpu_done) {
+               first_cpu_done = true;
+               vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+       }
+
+       pthread_spin_unlock(&create_lock);
+
+       for (;;) {
+                struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+                case UCALL_DONE:
+                       goto out;
+
+                case UCALL_SYNC:
+                       printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+                              uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+                       failures++;
+                       break;
+
+                default:
+                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+ out:
+       return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+       vm = vm_create(NR_TEST_VCPUS);
+       vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+       pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+       pthread_t cpu_threads[NR_TEST_VCPUS];
+       unsigned long cpu;
+       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+               pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+       unsigned long failures = 0;
+       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+               void *this_cpu_failures;
+               pthread_join(cpu_threads[cpu], &this_cpu_failures);
+               failures += (unsigned long)this_cpu_failures;
+       }
+
+       TEST_ASSERT(!failures, "TSC sync failed");
+       pthread_spin_destroy(&create_lock);
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c

new file mode 100644 (file)

index 0000000..57f157c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR  0xa9
+
+#define UCNA_BANK  0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+       struct kvm_vcpu *vcpu;
+       uint64_t *p_i_ucna_rcvd;
+       uint64_t *p_i_ucna_addr;
+       uint64_t *p_ucna_addr;
+       uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+       uint64_t base = GET_APIC_BASE(msr);
+
+       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+       uint64_t ctl2;
+       verify_apic_base_addr();
+       xapic_enable();
+
+       /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+       xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+       /* Enables interrupt in guest. */
+       asm volatile("sti");
+
+       /* Let user space inject the first UCNA */
+       GUEST_SYNC(SYNC_FIRST_UCNA);
+
+       ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+       /* Disables the per-bank CMCI signaling. */
+       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+       /* Let the user space inject the second UCNA */
+       GUEST_SYNC(SYNC_SECOND_UCNA);
+
+       ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+       GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+       GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+       GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+       i_ucna_rcvd++;
+       i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+       xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+                   "Expect UCALL_SYNC");
+       TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+       printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+       /*
+        * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+        * the IA32_MCi_STATUS register.
+        * MSCOD=1 (BIT[16] - MscodDataRdErr).
+        * MCACOD=0x0090 (Memory controller error format, channel 0)
+        */
+       uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+                         MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+       struct kvm_x86_mce mce = {};
+       mce.status = status;
+       mce.mcg_status = 0;
+       /*
+        * MCM_ADDR_PHYS indicates the reported address is a physical address.
+        * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+        * is at 4KB granularity.
+        */
+       mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+       mce.addr = addr;
+       mce.bank = UCNA_BANK;
+
+       vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+       struct thread_params *params = (struct thread_params *)arg;
+       struct ucall uc;
+       int old;
+       int r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(r == 0,
+                   "pthread_setcanceltype failed with errno=%d",
+                   r);
+
+       vcpu_run(params->vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+                   "Expect UCALL_SYNC");
+       TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+       printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+       inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+       vcpu_run(params->vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+                   "Expect UCALL_SYNC");
+       TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+       printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+       inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+       vcpu_run(params->vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+       if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+               TEST_ASSERT(false, "vCPU assertion failure: %s.",
+                           (const char *)uc.args[0]);
+       }
+
+       return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       params->vcpu = vcpu;
+       params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+       params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+       params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+       params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+       run_ucna_injection(params);
+
+       TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+       TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+                   "Only first UCNA reported addr get recorded via interrupt.");
+       TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+                   "First injected UCNAs should get exposed via registers.");
+       TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+                   "Second injected UCNAs should get exposed via registers.");
+
+       printf("Test successful.\n"
+              "UCNA CMCI interrupts received: %ld\n"
+              "Last UCNA address received via CMCI: %lx\n"
+              "First UCNA address in vCPU thread: %lx\n"
+              "Second UCNA address in vCPU thread: %lx\n",
+              *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+              *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+       uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+       if (enable_cmci_p)
+               mcg_caps |= MCG_CMCI_P;
+
+       mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+       vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+                                                bool enable_cmci_p, void *guest_code)
+{
+       struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+       setup_mce_cap(vcpu, enable_cmci_p);
+       return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+       struct thread_params params;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *ucna_vcpu;
+       struct kvm_vcpu *cmcidis_vcpu;
+       struct kvm_vcpu *cmci_vcpu;
+
+       kvm_check_cap(KVM_CAP_MCE);
+
+       vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
+
+       kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+                 &supported_mcg_caps);
+
+       if (!(supported_mcg_caps & MCG_CMCI_P)) {
+               print_skip("MCG_CMCI_P is not supported");
+               exit(KSFT_SKIP);
+       }
+
+       ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+       cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+       cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+       vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       test_ucna_injection(ucna_vcpu, &params);
+       run_vcpu_expect_gp(cmcidis_vcpu);
+       run_vcpu_expect_gp(cmci_vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c

new file mode 100644 (file)

index 0000000..9481cbc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+       unsigned long end;
+
+       if (count == 2)
+               end = (unsigned long)buffer + 1;
+       else
+               end = (unsigned long)buffer + 8192;
+
+       asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+       GUEST_ASSERT_EQ(count, 0);
+       GUEST_ASSERT_EQ((unsigned long)buffer, end);
+}
+
+static void guest_code(void)
+{
+       uint8_t buffer[8192];
+       int i;
+
+       /*
+        * Special case tests.  main() will adjust RCX 2 => 1 and 3 => 8192 to
+        * test that KVM doesn't explode when userspace modifies the "count" on
+        * a userspace I/O exit.  KVM isn't required to play nice with the I/O
+        * itself as KVM doesn't support manipulating the count, it just needs
+        * to not explode or overflow a buffer.
+        */
+       guest_ins_port80(buffer, 2);
+       guest_ins_port80(buffer, 3);
+
+       /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+       memset(buffer, 0, sizeof(buffer));
+       guest_ins_port80(buffer, 8192);
+       for (i = 0; i < 8192; i++)
+               __GUEST_ASSERT(buffer[i] == 0xaa,
+                              "Expected '0xaa', got '0x%x' at buffer[%u]",
+                              buffer[i], i);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_regs regs;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       memset(&regs, 0, sizeof(regs));
+
+       while (1) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               if (get_ucall(vcpu, &uc))
+                       break;
+
+               TEST_ASSERT(run->io.port == 0x80,
+                           "Expected I/O at port 0x80, got port 0x%x", run->io.port);
+
+               /*
+                * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+                * Note, this abuses KVM's batching of rep string I/O to avoid
+                * getting stuck in an infinite loop.  That behavior isn't in
+                * scope from a testing perspective as it's not ABI in any way,
+                * i.e. it really is abusing internal KVM knowledge.
+                */
+               vcpu_regs_get(vcpu, &regs);
+               if (regs.rcx == 2)
+                       regs.rcx = 1;
+               if (regs.rcx == 3)
+                       regs.rcx = 8192;
+               memset((void *)run + run->io.data_offset, 0xaa, 4096);
+               vcpu_regs_set(vcpu, &regs);
+       }
+
+       switch (uc.cmd) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c

new file mode 100644 (file)

index 0000000..32b2794
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -0,0 +1,769 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+#include <sys/ioctl.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel knows about. */
+                       .base = MSR_IA32_XSS,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel doesn't know about. */
+                       .base = MSR_IA32_FLUSH_CMD,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test a fabricated MSR that no one knows about. */
+                       .base = MSR_NON_EXISTENT,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_fs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_FS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_gs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_GS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+       bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000_write,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+                       .base = 0x40000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_40000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000_read,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+                       .base = 0xdeadbeef,
+                       .nmsrs = 1,
+                       .bitmap = bitmap_deadbeef,
+               },
+       },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+       uint64_t data;
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+        *
+        * A GP is thrown if anything other than 0 is written to
+        * MSR_IA32_XSS.
+        */
+       data = test_rdmsr(MSR_IA32_XSS);
+       GUEST_ASSERT(data == 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 1);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+        *
+        * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+        * from or if a value other than 1 is written to it.
+        */
+       test_rdmsr(MSR_IA32_FLUSH_CMD);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+        *
+        * Test that a fabricated MSR can pass through the kernel
+        * and be handled in userspace.
+        */
+       test_wrmsr(MSR_NON_EXISTENT, 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       data = test_rdmsr(MSR_NON_EXISTENT);
+       GUEST_ASSERT(data == 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       if (is_forced_emulation_enabled) {
+               /* Let userspace know we aren't done. */
+               GUEST_SYNC(0);
+
+               /*
+                * Now run the same tests with the instruction emulator.
+                */
+               data = test_em_rdmsr(MSR_IA32_XSS);
+               GUEST_ASSERT(data == 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 1);
+               GUEST_ASSERT(guest_exception_count == 1);
+
+               test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+               GUEST_ASSERT(guest_exception_count == 0);
+
+               test_em_wrmsr(MSR_NON_EXISTENT, 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+               data = test_em_rdmsr(MSR_NON_EXISTENT);
+               GUEST_ASSERT(data == 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+       }
+
+       GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+       /* This goes into the in-kernel emulation */
+       wrmsr(MSR_SYSCALL_MASK, 0);
+
+       if (trapped) {
+               /* This goes into user space emulation */
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+       } else {
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+       }
+
+       /* If trapped == true, this goes into user space emulation */
+       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+       /* This goes into the in-kernel emulation */
+       rdmsr(MSR_IA32_POWER_CTL);
+
+       /* Invalid MSR, should always be handled by user space exit */
+       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+       wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+       guest_msr_calls(true);
+
+       /*
+        * Disable msr filtering, so that the kernel
+        * handles everything in the next round
+        */
+       GUEST_SYNC(0);
+
+       guest_msr_calls(false);
+
+       GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+       uint64_t data;
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data == MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data != MSR_GS_BASE);
+
+       /* Let userspace know to switch the filter */
+       GUEST_SYNC(0);
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data != MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data == MSR_GS_BASE);
+
+       GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+                              char *r_start, char *r_end,
+                              char *w_start, char *w_end)
+{
+       if (regs->rip == (uintptr_t)r_start) {
+               regs->rip = (uintptr_t)r_end;
+               regs->rax = 0;
+               regs->rdx = 0;
+       } else if (regs->rip == (uintptr_t)w_start) {
+               regs->rip = (uintptr_t)w_end;
+       } else {
+               GUEST_ASSERT(!"RIP is at an unknown location!");
+       }
+
+       ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+                          &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+                          &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+           get_ucall(vcpu, &uc) == UCALL_ABORT) {
+               REPORT_GUEST_ASSERT(uc);
+       }
+}
+
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu->run;
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               run->msr.data = 0;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               run->msr.data = msr_non_existent_data;
+               break;
+       case MSR_FS_BASE:
+               run->msr.data = MSR_FS_BASE;
+               break;
+       case MSR_GS_BASE:
+               run->msr.data = MSR_GS_BASE;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu->run;
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               if (run->msr.data != 0)
+                       run->msr.error = 1;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               if (run->msr.data != 1)
+                       run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               msr_non_existent_data = run->msr.data;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+                   "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+                   uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc = {};
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               break;
+       case UCALL_ABORT:
+               check_for_guest_assert(vcpu);
+               break;
+       case UCALL_DONE:
+               process_ucall_done(vcpu);
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected ucall");
+       }
+
+       return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+                                        uint32_t msr_index)
+{
+       vcpu_run(vcpu);
+       process_rdmsr(vcpu, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+                                        uint32_t msr_index)
+{
+       vcpu_run(vcpu);
+       process_wrmsr(vcpu, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+{
+       vcpu_run(vcpu);
+       return process_ucall(vcpu);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
+{
+       vcpu_run(vcpu);
+       process_ucall_done(vcpu);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t cmd;
+       int rc;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+       /* Process guest code userspace exits. */
+       run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+       run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+       run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+       run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+       vcpu_run(vcpu);
+       cmd = process_ucall(vcpu);
+
+       if (is_forced_emulation_enabled) {
+               TEST_ASSERT_EQ(cmd, UCALL_SYNC);
+               vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
+
+               /* Process emulated rdmsr and wrmsr instructions. */
+               run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+               run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+               run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+               run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+               /* Confirm the guest completed without issues. */
+               run_guest_then_process_ucall_done(vcpu);
+       } else {
+               TEST_ASSERT_EQ(cmd, UCALL_DONE);
+               printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+       }
+}
+
+static int handle_ucall(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_SYNC:
+               vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+               break;
+       case UCALL_DONE:
+               return 1;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+       run->msr.data = run->msr.index;
+       msr_reads++;
+
+       if (run->msr.index == MSR_SYSCALL_MASK ||
+           run->msr.index == MSR_GS_BASE) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR read trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "MSR deadbeef read trap w/o inval fault");
+       }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+       /* ignore */
+       msr_writes++;
+
+       if (run->msr.index == MSR_IA32_POWER_CTL) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for MSR_IA32_POWER_CTL incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR_IA32_POWER_CTL trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for deadbeef incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "deadbeef trap w/o inval fault");
+       }
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       struct kvm_run *run = vcpu->run;
+       int rc;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+                                                     KVM_MSR_EXIT_REASON_UNKNOWN |
+                                                     KVM_MSR_EXIT_REASON_FILTER);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       prepare_bitmaps();
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               switch (run->exit_reason) {
+               case KVM_EXIT_X86_RDMSR:
+                       handle_rdmsr(run);
+                       break;
+               case KVM_EXIT_X86_WRMSR:
+                       handle_wrmsr(run);
+                       break;
+               case KVM_EXIT_IO:
+                       if (handle_ucall(vcpu))
+                               goto done;
+                       break;
+               }
+
+       }
+
+done:
+       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       int rc;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+       run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+       TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+                   "Expected ucall state to be UCALL_SYNC.");
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+       run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+       run_guest_then_process_ucall_done(vcpu);
+}
+
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)       \
+({                                                                     \
+       int r = __vm_ioctl(vm, cmd, arg);                               \
+                                                                       \
+       if (flag & valid_mask)                                          \
+               TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r));            \
+       else                                                            \
+               TEST_ASSERT(r == -1 && errno == EINVAL,                 \
+                           "Wanted EINVAL for %s with flag = 0x%llx, got  rc: %i errno: %i (%s)", \
+                           #cmd, flag, r, errno,  strerror(errno));    \
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
+{
+       struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+       int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+       int rc;
+       int i;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+
+       for (i = 0; i < nflags; i++) {
+               cap.args[0] = BIT_ULL(i);
+               test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+                          BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+       }
+}
+
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+       u64 deny_bits = 0;
+       struct kvm_msr_filter filter = {
+               .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+               .ranges = {
+                       {
+                               .flags = KVM_MSR_FILTER_READ,
+                               .nmsrs = 1,
+                               .base = 0,
+                               .bitmap = (uint8_t *)&deny_bits,
+                       },
+               },
+       };
+       int nflags;
+       int rc;
+       int i;
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+       for (i = 0; i < nflags; i++) {
+               filter.flags = BIT_ULL(i);
+               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+                          BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+       }
+
+       filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+       nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+       for (i = 0; i < nflags; i++) {
+               filter.ranges[0].flags = BIT_ULL(i);
+               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+                          BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+       }
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
+{
+       struct kvm_vm *vm = vcpu->vm;
+
+       /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+       run_user_space_msr_flag_test(vm);
+
+       /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+       run_msr_filter_flag_test(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c

new file mode 100644 (file)

index 0000000..a81a247
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+static void l2_guest_code(void)
+{
+       /* Exit to L1 */
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+       control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+       vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+       /* Try to launch L2 with the memory-backed APIC-access address. */
+       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+       /* Try to resume L2 with the unbacked APIC-access address. */
+       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned long apic_access_addr = ~0ul;
+       vm_vaddr_t vmx_pages_gva;
+       unsigned long high_gpa;
+       struct vmx_pages *vmx;
+       bool done = false;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       high_gpa = (vm->max_gfn - 1) << vm->page_shift;
+
+       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       prepare_virtualize_apic_accesses(vmx, vm);
+       vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
+
+       while (!done) {
+               volatile struct kvm_run *run = vcpu->run;
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               if (apic_access_addr == high_gpa) {
+                       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+                       TEST_ASSERT(run->internal.suberror ==
+                                   KVM_INTERNAL_ERROR_EMULATION,
+                                   "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
+                                   run->internal.suberror);
+                       break;
+               }
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       apic_access_addr = uc.args[1];
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+               }
+       }
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c

new file mode 100644 (file)

index 0000000..dad9883
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_close_while_nested
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Verify that nothing bad happens if a KVM user exits with open
+ * file descriptors while executing a nested guest.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+enum {
+       PORT_L0_EXIT = 0x2000,
+};
+
+static void l2_guest_code(void)
+{
+       /* Exit to L0 */
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (PORT_L0_EXIT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       for (;;) {
+               volatile struct kvm_run *run = vcpu->run;
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               if (run->io.port == PORT_L0_EXIT)
+                       break;
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c

new file mode 100644 (file)

index 0000000..fa512d0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX            1
+#define TEST_MEM_PAGES                 3
+
+/* L1 guest test virtual memory offset */
+#define GUEST_TEST_MEM                 0xc0000000
+
+/* L2 guest test virtual memory offset */
+#define NESTED_TEST_MEM1               0xc0001000
+#define NESTED_TEST_MEM2               0xc0002000
+
+static void l2_guest_code(u64 *a, u64 *b)
+{
+       READ_ONCE(*a);
+       WRITE_ONCE(*a, 1);
+       GUEST_SYNC(true);
+       GUEST_SYNC(false);
+
+       WRITE_ONCE(*b, 1);
+       GUEST_SYNC(true);
+       WRITE_ONCE(*b, 1);
+       GUEST_SYNC(true);
+       GUEST_SYNC(false);
+
+       /* Exit to L1 and never come back.  */
+       vmcall();
+}
+
+static void l2_guest_code_ept_enabled(void)
+{
+       l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
+}
+
+static void l2_guest_code_ept_disabled(void)
+{
+       /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
+       l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       void *l2_rip;
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+
+       if (vmx->eptp_gpa)
+               l2_rip = l2_guest_code_ept_enabled;
+       else
+               l2_rip = l2_guest_code_ept_disabled;
+
+       prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(false);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_SYNC(false);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_DONE();
+}
+
+static void test_vmx_dirty_log(bool enable_ept)
+{
+       vm_vaddr_t vmx_pages_gva = 0;
+       struct vmx_pages *vmx;
+       unsigned long *bmap;
+       uint64_t *host_test_mem;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       bool done = false;
+
+       pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       /* Add an extra memory slot for testing dirty logging */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   GUEST_TEST_MEM,
+                                   TEST_MEM_SLOT_INDEX,
+                                   TEST_MEM_PAGES,
+                                   KVM_MEM_LOG_DIRTY_PAGES);
+
+       /*
+        * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
+        * affects both L1 and L2.  However...
+        */
+       virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
+
+       /*
+        * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
+        * 0xc0000000.
+        *
+        * Note that prepare_eptp should be called only L1's GPA map is done,
+        * meaning after the last call to virt_map.
+        *
+        * When EPT is disabled, the L2 guest code will still access the same L1
+        * GPAs as the EPT enabled case.
+        */
+       if (enable_ept) {
+               prepare_eptp(vmx, vm, 0);
+               nested_map_memslot(vmx, vm, 0);
+               nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+               nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+       }
+
+       bmap = bitmap_zalloc(TEST_MEM_PAGES);
+       host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+
+       while (!done) {
+               memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       /*
+                        * The nested guest wrote at offset 0x1000 in the memslot, but the
+                        * dirty bitmap must be filled in according to L1 GPA, not L2.
+                        */
+                       kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+                       if (uc.args[1]) {
+                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
+                       } else {
+                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
+                       }
+
+                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       test_vmx_dirty_log(/*enable_ept=*/false);
+
+       if (kvm_cpu_has_ept())
+               test_vmx_dirty_log(/*enable_ept=*/true);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c

new file mode 100644 (file)

index 0000000..3fd6ece
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+       asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+                   "Expected emulation failure, got %d",
+                   run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Always run twice to verify KVM handles the case where _KVM_ queues
+        * an exception with invalid state and then exits to userspace, i.e.
+        * that KVM doesn't explode if userspace ignores the initial error.
+        */
+       __run_vcpu_with_invalid_state(vcpu);
+       __run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+       struct itimerval timer;
+
+       timer.it_value.tv_sec  = 0;
+       timer.it_value.tv_usec = 200;
+       timer.it_interval = timer.it_value;
+       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+       static struct kvm_sregs sregs;
+
+       if (!sregs.cr0)
+               vcpu_sregs_get(vcpu, &sregs);
+       sregs.tr.unusable = !!set;
+       vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+       set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+       set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+       static struct kvm_vcpu *vcpu = NULL;
+
+       if (__vcpu)
+               vcpu = __vcpu;
+       return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+       struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+       struct kvm_vcpu_events events;
+
+       TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+       vcpu_events_get(vcpu, &events);
+
+       /*
+        * If an exception is pending, attempt KVM_RUN with invalid guest,
+        * otherwise rearm the timer and keep doing so until the timer fires
+        * between KVM queueing an exception and re-entering the guest.
+        */
+       if (events.exception.pending) {
+               set_invalid_guest_state(vcpu);
+               run_vcpu_with_invalid_state(vcpu);
+       } else {
+               set_timer();
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(host_cpu_is_intel);
+       TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       get_set_sigalrm_vcpu(vcpu);
+
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+       /*
+        * Stuff invalid guest state for L2 by making TR unusuable.  The next
+        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+        * emulating invalid guest state for L2.
+        */
+       set_invalid_guest_state(vcpu);
+       run_vcpu_with_invalid_state(vcpu);
+
+       /*
+        * Verify KVM also handles the case where userspace gains control while
+        * an exception is pending and stuffs invalid state.  Run with valid
+        * guest state and a timer firing every 200us, and attempt to enter the
+        * guest with invalid state when the handler interrupts KVM with an
+        * exception pending.
+        */
+       clear_invalid_guest_state(vcpu);
+       TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+                   "Failed to register SIGALRM handler, errno = %d (%s)",
+                   errno, strerror(errno));
+
+       set_timer();
+       run_vcpu_with_invalid_state(vcpu);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c

new file mode 100644 (file)

index 0000000..a100ee5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+       /*
+        * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+        * arbitrary port.
+        */
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /*
+        * L2 must be run without unrestricted guest, verify that the selftests
+        * library hasn't enabled it.  Because KVM selftests jump directly to
+        * 64-bit mode, unrestricted guest support isn't required.
+        */
+       GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+                    !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+       GUEST_ASSERT(!vmlaunch());
+
+       /* L2 should triple fault after main() stuffs invalid guest state. */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_sregs sregs;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       vcpu_run(vcpu);
+
+       run = vcpu->run;
+
+       /*
+        * The first exit to L0 userspace should be an I/O access from L2.
+        * Running L1 should launch L2 without triggering an exit to userspace.
+        */
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+                   "Expected IN from port %d from L2, got port %d",
+                   ARBITRARY_IO_PORT, run->io.port);
+
+       /*
+        * Stuff invalid guest state for L2 by making TR unusuable.  The next
+        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+        * emulating invalid guest state for L2.
+        */
+       memset(&sregs, 0, sizeof(sregs));
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs.tr.unusable = 1;
+       vcpu_sregs_set(vcpu, &sregs);
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c

new file mode 100644 (file)

index 0000000..90720b6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+                                 uint64_t mask)
+{
+       uint64_t val = vcpu_get_msr(vcpu, msr_index);
+       uint64_t bit;
+
+       mask &= val;
+
+       for_each_set_bit(bit, &mask, 64) {
+               vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+               vcpu_set_msr(vcpu, msr_index, val);
+       }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+                               uint64_t mask)
+{
+       uint64_t val = vcpu_get_msr(vcpu, msr_index);
+       uint64_t bit;
+
+       mask = ~mask | val;
+
+       for_each_clear_bit(bit, &mask, 64) {
+               vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+               vcpu_set_msr(vcpu, msr_index, val);
+       }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+       vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+       vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+                           BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+                           BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+                           BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+                                           uint64_t msr_bit,
+                                           struct kvm_x86_cpu_feature feature)
+{
+       uint64_t val;
+
+       vcpu_clear_cpuid_feature(vcpu, feature);
+
+       val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+       if (!kvm_cpu_has(feature))
+               return;
+
+       vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+       uint64_t supported_bits = FEAT_CTL_LOCKED |
+                                 FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+                                 FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+                                 FEAT_CTL_SGX_LC_ENABLED |
+                                 FEAT_CTL_SGX_ENABLED |
+                                 FEAT_CTL_LMCE_ENABLED;
+       int bit, r;
+
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+       for_each_clear_bit(bit, &supported_bits, 64) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+               TEST_ASSERT(r == 0,
+                           "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+       }
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       /* No need to actually do KVM_RUN, thus no guest code. */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       vmx_save_restore_msrs_test(vcpu);
+       ia32_feature_control_msr_test(vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c

new file mode 100644 (file)

index 0000000..1759fa5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+       uint64_t tolerance, thresh_low, thresh_high;
+
+       tolerance = expected / 100;
+       thresh_low = expected - tolerance;
+       thresh_high = expected + tolerance;
+
+       TEST_ASSERT(thresh_low < actual,
+               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+               " but it actually is %"PRIu64,
+               thresh_low, thresh_high, actual);
+       TEST_ASSERT(thresh_high > actual,
+               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+               " but it actually is %"PRIu64,
+               thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+       uint64_t tsc_start, tsc_end, tsc_freq;
+
+       /*
+        * Reading the TSC twice with about a second's difference should give
+        * us an approximation of the TSC frequency from the guest's
+        * perspective. Now, this won't be completely accurate, but it should
+        * be good enough for the purposes of this test.
+        */
+       tsc_start = rdmsr(MSR_IA32_TSC);
+       GUEST_SLEEP(1);
+       tsc_end = rdmsr(MSR_IA32_TSC);
+
+       tsc_freq = tsc_end - tsc_start;
+
+       GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+       check_tsc_freq(UCHECK_L2);
+
+       /* exit to L1 */
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+
+       /* check that L1's frequency looks alright before launching L2 */
+       check_tsc_freq(UCHECK_L1);
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* prepare the VMCS for L2 execution */
+       prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* enable TSC offsetting and TSC scaling for L2 */
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+       control |= SECONDARY_EXEC_TSC_SCALING;
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+       vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+       vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+       vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+       /* launch L2 */
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       /* check that L1's frequency still looks good */
+       check_tsc_freq(UCHECK_L1);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       vm_vaddr_t vmx_pages_gva;
+
+       uint64_t tsc_start, tsc_end;
+       uint64_t tsc_khz;
+       uint64_t l1_scale_factor;
+       uint64_t l0_tsc_freq = 0;
+       uint64_t l1_tsc_freq = 0;
+       uint64_t l2_tsc_freq = 0;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+       /*
+        * We set L1's scale factor to be a random number from 2 to 10.
+        * Ideally we would do the same for L2's factor but that one is
+        * referenced by both main() and l1_guest_code() and using a global
+        * variable does not work.
+        */
+       srand(time(NULL));
+       l1_scale_factor = (rand() % 9) + 2;
+       printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+       printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+       tsc_start = rdtsc();
+       sleep(1);
+       tsc_end = rdtsc();
+
+       l0_tsc_freq = tsc_end - tsc_start;
+       printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+       TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+       /* scale down L1's TSC frequency */
+       vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               case UCALL_SYNC:
+                       switch (uc.args[0]) {
+                       case USLEEP:
+                               sleep(uc.args[1]);
+                               break;
+                       case UCHECK_L1:
+                               l1_tsc_freq = uc.args[1];
+                               printf("L1's TSC frequency is around: %"PRIu64
+                                      "\n", l1_tsc_freq);
+
+                               compare_tsc_freq(l1_tsc_freq,
+                                                l0_tsc_freq / l1_scale_factor);
+                               break;
+                       case UCHECK_L2:
+                               l2_tsc_freq = uc.args[1];
+                               printf("L2's TSC frequency is around: %"PRIu64
+                                      "\n", l2_tsc_freq);
+
+                               compare_tsc_freq(l2_tsc_freq,
+                                                l1_tsc_freq * L2_SCALE_FACTOR);
+                               break;
+                       }
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c

new file mode 100644 (file)

index 0000000..a1f5ff4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+#include <sys/ioctl.h>
+
+#include <linux/bitmap.h>
+
+#include "kvm_test_harness.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static union perf_capabilities {
+       struct {
+               u64     lbr_format:6;
+               u64     pebs_trap:1;
+               u64     pebs_arch_reg:1;
+               u64     pebs_format:4;
+               u64     smm_freeze:1;
+               u64     full_width_write:1;
+               u64 pebs_baseline:1;
+               u64     perf_metrics:1;
+               u64     pebs_output_pt_available:1;
+               u64     anythread_deprecated:1;
+       };
+       u64     capabilities;
+} host_cap;
+
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+       .lbr_format = -1,
+       .pebs_trap  = 1,
+       .pebs_arch_reg = 1,
+       .pebs_format = -1,
+       .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+       .lbr_format = -1,
+       .pebs_format = -1,
+};
+
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+       __GUEST_ASSERT(vector == GP_VECTOR,
+                      "Expected #GP for value '0x%lx', got vector '0x%x'",
+                      val, vector);
+}
+
+static void guest_code(uint64_t current_val)
+{
+       int i;
+
+       guest_test_perf_capabilities_gp(current_val);
+       guest_test_perf_capabilities_gp(0);
+
+       for (i = 0; i < 64; i++)
+               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
+
+       GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
+{
+       struct ucall uc;
+       int r, i;
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+       vcpu_args_set(vcpu, 1, host_cap.capabilities);
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+                       host_cap.capabilities);
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+       r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+       TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+       for (i = 0; i < 64; i++) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+                                 host_cap.capabilities ^ BIT_ULL(i));
+               TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+                           host_cap.capabilities ^ BIT_ULL(i));
+       }
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
+{
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
+{
+       const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+       int bit;
+
+       for_each_set_bit(bit, &fungible_caps, 64) {
+               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+                            host_cap.capabilities & ~BIT_ULL(bit));
+       }
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES.  Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
+{
+       const uint64_t reserved_caps = (~host_cap.capabilities |
+                                       immutable_caps.capabilities) &
+                                      ~format_caps.capabilities;
+       union perf_capabilities val = host_cap;
+       int r, bit;
+
+       for_each_set_bit(bit, &reserved_caps, 64) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+                                 host_cap.capabilities ^ BIT_ULL(bit));
+               TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+                           host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+                           BIT_ULL(bit), bit);
+       }
+
+       /*
+        * KVM only supports the host's native LBR format, as well as '0' (to
+        * disable LBR support).  Verify KVM rejects all other LBR formats.
+        */
+       for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+               if (val.lbr_format == host_cap.lbr_format)
+                       continue;
+
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+               TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+                           val.lbr_format, host_cap.lbr_format);
+       }
+
+       /* Ditto for the PEBS format. */
+       for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+               if (val.pebs_format == host_cap.pebs_format)
+                       continue;
+
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+               TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+                           val.pebs_format, host_cap.pebs_format);
+       }
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support.  Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
+{
+       int r;
+
+       if (!host_cap.lbr_format)
+               return;
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+       vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+       vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+       r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+       TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
+{
+       uint64_t val;
+       int i, r;
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+       TEST_ASSERT_EQ(val, host_cap.capabilities);
+
+       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
+
+       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+       TEST_ASSERT_EQ(val, 0);
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+
+       for (i = 0; i < 64; i++) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
+               TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
+                           i, BIT_ULL(i));
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+       host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+       TEST_ASSERT(host_cap.full_width_write,
+                   "Full-width writes should always be supported");
+
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c

new file mode 100644 (file)

index 0000000..00dd2ac
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PREEMPTION_TIMER_VALUE                 100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1       80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
+void l2_guest_code(void)
+{
+       u64 vmx_pt_delta;
+
+       vmcall();
+       l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+       /*
+        * Wait until the 1st threshold has passed
+        */
+       do {
+               l2_vmx_pt_finish = rdtsc();
+               vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+                               vmx_pt_rate;
+       } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+       /*
+        * Force L2 through Save and Restore cycle
+        */
+       GUEST_SYNC(1);
+
+       l2_save_restore_done = 1;
+
+       /*
+        * Now wait for the preemption timer to fire and
+        * exit to L1
+        */
+       while ((l2_vmx_pt_finish = rdtsc()))
+               ;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       u64 l1_vmx_pt_start;
+       u64 l1_vmx_pt_finish;
+       u64 l1_tsc_deadline, l2_tsc_deadline;
+
+       GUEST_ASSERT(vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /*
+        * Check for Preemption timer support
+        */
+       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+                       : MSR_IA32_VMX_PINBASED_CTLS);
+       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+                       : MSR_IA32_VMX_EXIT_CTLS);
+
+       if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+           !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+               return;
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+       /*
+        * Turn on PIN control and resume the guest
+        */
+       GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+                             vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+                             PIN_BASED_VMX_PREEMPTION_TIMER));
+
+       GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+                             PREEMPTION_TIMER_VALUE));
+
+       vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+       l2_save_restore_done = 0;
+
+       l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+       GUEST_ASSERT(!vmresume());
+
+       l1_vmx_pt_finish = rdtsc();
+
+       /*
+        * Ensure exit from L2 happens after L2 goes through
+        * save and restore
+        */
+       GUEST_ASSERT(l2_save_restore_done);
+
+       /*
+        * Ensure the exit from L2 is due to preemption timer expiry
+        */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+       l1_tsc_deadline = l1_vmx_pt_start +
+               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+       l2_tsc_deadline = l2_vmx_pt_start +
+               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+       /*
+        * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+        * tsc deadlines so that host can verify they are as expected
+        */
+       GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+               l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+       if (vmx_pages)
+               l1_guest_code(vmx_pages);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva = 0;
+
+       struct kvm_regs regs1, regs2;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       struct kvm_x86_state *state;
+       struct ucall uc;
+       int stage;
+
+       /*
+        * AMD currently does not implement any VMX features, so for now we
+        * just early out.
+        */
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vcpu_regs_get(vcpu, &regs1);
+
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+               /*
+                * If this stage 2 then we should verify the vmx pt expiry
+                * is as expected.
+                * From L1's perspective verify Preemption timer hasn't
+                * expired too early.
+                * From L2's perspective verify Preemption timer hasn't
+                * expired too late.
+                */
+               if (stage == 2) {
+
+                       pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+                               stage, uc.args[2], uc.args[3]);
+
+                       pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+                               stage, uc.args[4], uc.args[5]);
+
+                       TEST_ASSERT(uc.args[2] >= uc.args[3],
+                               "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+                               stage, uc.args[2], uc.args[3]);
+
+                       TEST_ASSERT(uc.args[4] < uc.args[5],
+                               "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+                               stage, uc.args[4], uc.args[5]);
+               }
+
+               state = vcpu_save_state(vcpu);
+               memset(&regs1, 0, sizeof(regs1));
+               vcpu_regs_get(vcpu, &regs1);
+
+               kvm_vm_release(vm);
+
+               /* Restore state in a new VM.  */
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+               kvm_x86_state_cleanup(state);
+
+               memset(&regs2, 0, sizeof(regs2));
+               vcpu_regs_get(vcpu, &regs2);
+               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                           (ulong) regs2.rdi, (ulong) regs2.rsi);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c

new file mode 100644 (file)

index 0000000..67a62a5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_set_nested_state_test
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <errno.h>
+#include <linux/kvm.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+/*
+ * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
+ * changes this should be updated.
+ */
+#define VMCS12_REVISION 0x11e57ed0
+
+bool have_evmcs;
+
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
+{
+       vcpu_nested_state_set(vcpu, state);
+}
+
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
+                                   struct kvm_nested_state *state,
+                                   int expected_errno)
+{
+       int rv;
+
+       rv = __vcpu_nested_state_set(vcpu, state);
+       TEST_ASSERT(rv == -1 && errno == expected_errno,
+               "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
+               strerror(expected_errno), expected_errno, rv, strerror(errno),
+               errno);
+}
+
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
+                                    struct kvm_nested_state *state)
+{
+       test_nested_state_expect_errno(vcpu, state, EINVAL);
+}
+
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
+                                    struct kvm_nested_state *state)
+{
+       test_nested_state_expect_errno(vcpu, state, EFAULT);
+}
+
+void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
+                               u32 vmcs12_revision)
+{
+       /* Set revision_id in vmcs12 to vmcs12_revision. */
+       memcpy(&state->data, &vmcs12_revision, sizeof(u32));
+}
+
+void set_default_state(struct kvm_nested_state *state)
+{
+       memset(state, 0, sizeof(*state));
+       state->flags = KVM_STATE_NESTED_RUN_PENDING |
+                      KVM_STATE_NESTED_GUEST_MODE;
+       state->format = 0;
+       state->size = sizeof(*state);
+}
+
+void set_default_vmx_state(struct kvm_nested_state *state, int size)
+{
+       memset(state, 0, size);
+       if (have_evmcs)
+               state->flags = KVM_STATE_NESTED_EVMCS;
+       state->format = 0;
+       state->size = size;
+       state->hdr.vmx.vmxon_pa = 0x1000;
+       state->hdr.vmx.vmcs12_pa = 0x2000;
+       state->hdr.vmx.smm.flags = 0;
+       set_revision_id_for_vmcs12(state, VMCS12_REVISION);
+}
+
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
+{
+       /* Add a page for VMCS12. */
+       const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+       struct kvm_nested_state *state =
+               (struct kvm_nested_state *)malloc(state_sz);
+
+       /* The format must be set to 0. 0 for VMX, 1 for SVM. */
+       set_default_vmx_state(state, state_sz);
+       state->format = 1;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * We cannot virtualize anything if the guest does not have VMX
+        * enabled.
+        */
+       set_default_vmx_state(state, state_sz);
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * We cannot virtualize anything if the guest does not have VMX
+        * enabled.  We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
+        * is set to -1ull, but the flags must be zero.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = -1ull;
+       test_nested_state_expect_einval(vcpu, state);
+
+       state->hdr.vmx.vmcs12_pa = -1ull;
+       state->flags = KVM_STATE_NESTED_EVMCS;
+       test_nested_state_expect_einval(vcpu, state);
+
+       state->flags = 0;
+       test_nested_state(vcpu, state);
+
+       /* Enable VMX in the guest CPUID. */
+       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
+
+       /*
+        * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
+        * setting the nested state. When the eVMCS flag is not set, the
+        * expected return value is '0'.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->flags = 0;
+       state->hdr.vmx.vmxon_pa = -1ull;
+       state->hdr.vmx.vmcs12_pa = -1ull;
+       test_nested_state(vcpu, state);
+
+       /*
+        * When eVMCS is supported, the eVMCS flag can only be set if the
+        * enlightened VMCS capability has been enabled.
+        */
+       if (have_evmcs) {
+               state->flags = KVM_STATE_NESTED_EVMCS;
+               test_nested_state_expect_einval(vcpu, state);
+               vcpu_enable_evmcs(vcpu);
+               test_nested_state(vcpu, state);
+       }
+
+       /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
+       state->hdr.vmx.smm.flags = 1;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* Invalid flags are rejected. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.flags = ~0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = -1ull;
+       state->flags = 0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* It is invalid to have vmxon_pa set to a non-page aligned address. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = 1;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
+        * KVM_STATE_NESTED_GUEST_MODE set together.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->flags = KVM_STATE_NESTED_GUEST_MODE  |
+                     KVM_STATE_NESTED_RUN_PENDING;
+       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * It is invalid to have any of the SMM flags set besides:
+        *      KVM_STATE_NESTED_SMM_GUEST_MODE
+        *      KVM_STATE_NESTED_SMM_VMXON
+        */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
+                               KVM_STATE_NESTED_SMM_VMXON);
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* Outside SMM, SMM flags must be zero. */
+       set_default_vmx_state(state, state_sz);
+       state->flags = 0;
+       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * Size must be large enough to fit kvm_nested_state and vmcs12
+        * if VMCS12 physical address is set
+        */
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       state->hdr.vmx.vmcs12_pa = -1;
+       test_nested_state(vcpu, state);
+
+       /*
+        * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+        * contents but L2 not running.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->flags = 0;
+       test_nested_state(vcpu, state);
+
+       /* Invalid flags are rejected, even if no VMCS loaded. */
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       state->hdr.vmx.vmcs12_pa = -1;
+       state->hdr.vmx.flags = ~0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* vmxon_pa cannot be the same address as vmcs_pa. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = 0;
+       state->hdr.vmx.vmcs12_pa = 0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * Test that if we leave nesting the state reflects that when we get
+        * it again.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = -1ull;
+       state->hdr.vmx.vmcs12_pa = -1ull;
+       state->flags = 0;
+       test_nested_state(vcpu, state);
+       vcpu_nested_state_get(vcpu, state);
+       TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+                   "Size must be between %ld and %d.  The size returned was %d.",
+                   sizeof(*state), state_sz, state->size);
+       TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
+       TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+       free(state);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_nested_state state;
+       struct kvm_vcpu *vcpu;
+
+       have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+       /*
+        * AMD currently does not implement set_nested_state, so for now we
+        * just early out.
+        */
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       /*
+        * First run tests with VMX disabled to check error handling.
+        */
+       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+
+       /* Passing a NULL kvm_nested_state causes a EFAULT. */
+       test_nested_state_expect_efault(vcpu, NULL);
+
+       /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
+       set_default_state(&state);
+       state.size = 0;
+       test_nested_state_expect_einval(vcpu, &state);
+
+       /*
+        * Setting the flags 0xf fails the flags check.  The only flags that
+        * can be used are:
+        *     KVM_STATE_NESTED_GUEST_MODE
+        *     KVM_STATE_NESTED_RUN_PENDING
+        *     KVM_STATE_NESTED_EVMCS
+        */
+       set_default_state(&state);
+       state.flags = 0xf;
+       test_nested_state_expect_einval(vcpu, &state);
+
+       /*
+        * If KVM_STATE_NESTED_RUN_PENDING is set then
+        * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
+        */
+       set_default_state(&state);
+       state.flags = KVM_STATE_NESTED_RUN_PENDING;
+       test_nested_state_expect_einval(vcpu, &state);
+
+       test_vmx_nested_state(vcpu);
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c

new file mode 100644 (file)

index 0000000..2ceb5c7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_tsc_adjust_test
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+       PORT_ABORT = 0x1000,
+       PORT_REPORT,
+       PORT_DONE,
+};
+
+enum {
+       VMXON_PAGE = 0,
+       VMCS_PAGE,
+       MSR_BITMAP_PAGE,
+
+       NUM_VMX_PAGES,
+};
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+       int64_t adjust;
+
+       adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+       GUEST_SYNC(adjust);
+       GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+       uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+       wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+       /* Exit to L1 */
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+       uintptr_t save_cr3;
+
+       GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+       wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+       vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+       /* Jump into L2.  First, test failure to load guest CR3.  */
+       save_cr3 = vmreadz(GUEST_CR3);
+       vmwrite(GUEST_CR3, -1ull);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+                    (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+       vmwrite(GUEST_CR3, save_cr3);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+       GUEST_DONE();
+}
+
+static void report(int64_t val)
+{
+       pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+               val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_vcpu *vcpu;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       report(uc.args[1]);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c

new file mode 100644 (file)

index 0000000..a76078a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will 
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR      0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+       uint32_t halter_apic_id;
+       volatile uint64_t hlt_count;
+       volatile uint64_t wake_count;
+       uint64_t ipis_sent;
+       uint64_t migrations_attempted;
+       uint64_t migrations_completed;
+       uint32_t icr;
+       uint32_t icr2;
+       uint32_t halter_tpr;
+       uint32_t halter_ppr;
+
+       /*
+        *  Record local version register as a cross-check that APIC access
+        *  worked. Value should match what KVM reports (APIC_VERSION in
+        *  arch/x86/kvm/lapic.c). If test is failing, check that values match
+        *  to determine whether APIC access exits are working.
+        */
+       uint32_t halter_lvr;
+};
+
+struct thread_params {
+       struct test_data_page *data;
+       struct kvm_vcpu *vcpu;
+       uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+void verify_apic_base_addr(void)
+{
+       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+       uint64_t base = GET_APIC_BASE(msr);
+
+       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+       verify_apic_base_addr();
+       xapic_enable();
+
+       data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+       data->halter_lvr = xapic_read_reg(APIC_LVR);
+
+       /*
+        * Loop forever HLTing and recording halts & wakes. Disable interrupts
+        * each time around to minimize window between signaling the pending
+        * halt to the sender vCPU and executing the halt. No need to disable on
+        * first run as this vCPU executes first and the host waits for it to
+        * signal going into first halt before starting the sender vCPU. Record
+        * TPR and PPR for diagnostic purposes in case the test fails.
+        */
+       for (;;) {
+               data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+               data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
+               data->hlt_count++;
+               asm volatile("sti; hlt; cli");
+               data->wake_count++;
+       }
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+       ipis_rcvd++;
+       xapic_write_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+       uint64_t last_wake_count;
+       uint64_t last_hlt_count;
+       uint64_t last_ipis_rcvd_count;
+       uint32_t icr_val;
+       uint32_t icr2_val;
+       uint64_t tsc_start;
+
+       verify_apic_base_addr();
+       xapic_enable();
+
+       /*
+        * Init interrupt command register for sending IPIs
+        *
+        * Delivery mode=fixed, per SDM:
+        *   "Delivers the interrupt specified in the vector field to the target
+        *    processor."
+        *
+        * Destination mode=physical i.e. specify target by its local APIC
+        * ID. This vCPU assumes that the halter vCPU has already started and
+        * set data->halter_apic_id.
+        */
+       icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+       icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+       data->icr = icr_val;
+       data->icr2 = icr2_val;
+
+       last_wake_count = data->wake_count;
+       last_hlt_count = data->hlt_count;
+       last_ipis_rcvd_count = ipis_rcvd;
+       for (;;) {
+               /*
+                * Send IPI to halter vCPU.
+                * First IPI can be sent unconditionally because halter vCPU
+                * starts earlier.
+                */
+               xapic_write_reg(APIC_ICR2, icr2_val);
+               xapic_write_reg(APIC_ICR, icr_val);
+               data->ipis_sent++;
+
+               /*
+                * Wait up to ~1 sec for halter to indicate that it has:
+                * 1. Received the IPI
+                * 2. Woken up from the halt
+                * 3. Gone back into halt
+                * Current CPUs typically run at 2.x Ghz which is ~2
+                * billion ticks per second.
+                */
+               tsc_start = rdtsc();
+               while (rdtsc() - tsc_start < 2000000000) {
+                       if ((ipis_rcvd != last_ipis_rcvd_count) &&
+                           (data->wake_count != last_wake_count) &&
+                           (data->hlt_count != last_hlt_count))
+                               break;
+               }
+
+               GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+                            (data->wake_count != last_wake_count) &&
+                            (data->hlt_count != last_hlt_count));
+
+               last_wake_count = data->wake_count;
+               last_hlt_count = data->hlt_count;
+               last_ipis_rcvd_count = ipis_rcvd;
+       }
+}
+
+static void *vcpu_thread(void *arg)
+{
+       struct thread_params *params = (struct thread_params *)arg;
+       struct kvm_vcpu *vcpu = params->vcpu;
+       struct ucall uc;
+       int old;
+       int r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(r == 0,
+                   "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+                   vcpu->id, r);
+
+       fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
+               TEST_ASSERT(false,
+                           "vCPU %u exited with error: %s.\n"
+                           "Sending vCPU sent %lu IPIs to halting vCPU\n"
+                           "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+                           "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+                           "Migrations attempted: %lu\n"
+                           "Migrations completed: %lu",
+                           vcpu->id, (const char *)uc.args[0],
+                           params->data->ipis_sent, params->data->hlt_count,
+                           params->data->wake_count,
+                           *params->pipis_rcvd, params->data->halter_tpr,
+                           params->data->halter_ppr, params->data->halter_lvr,
+                           params->data->migrations_attempted,
+                           params->data->migrations_completed);
+       }
+
+       return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+       void *retval;
+       int r;
+
+       r = pthread_cancel(thread);
+       TEST_ASSERT(r == 0,
+                   "pthread_cancel on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+
+       r = pthread_join(thread, &retval);
+       TEST_ASSERT(r == 0,
+                   "pthread_join on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+       TEST_ASSERT(retval == PTHREAD_CANCELED,
+                   "expected retval=%p, got %p", PTHREAD_CANCELED,
+                   retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+                  uint64_t *pipis_rcvd)
+{
+       long pages_not_moved;
+       unsigned long nodemask = 0;
+       unsigned long nodemasks[sizeof(nodemask) * 8];
+       int nodes = 0;
+       time_t start_time, last_update, now;
+       time_t interval_secs = 1;
+       int i, r;
+       int from, to;
+       unsigned long bit;
+       uint64_t hlt_count;
+       uint64_t wake_count;
+       uint64_t ipis_sent;
+
+       fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+               delay_usecs);
+
+       /* Get set of first 64 numa nodes available */
+       r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+                         0, MPOL_F_MEMS_ALLOWED);
+       TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+       fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+               "(each 1-bit indicates node is present): %#lx\n",
+               sizeof(nodemask) * 8, nodemask);
+
+       /* Init array of masks containing a single-bit in each, one for each
+        * available node. migrate_pages called below requires specifying nodes
+        * as bit masks.
+        */
+       for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+               if (nodemask & bit) {
+                       nodemasks[nodes] = nodemask & bit;
+                       nodes++;
+               }
+       }
+
+       TEST_ASSERT(nodes > 1,
+                   "Did not find at least 2 numa nodes. Can't do migration");
+
+       fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+       from = 0;
+       to = 1;
+       start_time = time(NULL);
+       last_update = start_time;
+
+       ipis_sent = data->ipis_sent;
+       hlt_count = data->hlt_count;
+       wake_count = data->wake_count;
+
+       while ((int)(time(NULL) - start_time) < run_secs) {
+               data->migrations_attempted++;
+
+               /*
+                * migrate_pages with PID=0 will migrate all pages of this
+                * process between the nodes specified as bitmasks. The page
+                * backing the APIC access address belongs to this process
+                * because it is allocated by KVM in the context of the
+                * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+                * test may break or give a false positive signal.
+                */
+               pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+                                               &nodemasks[from],
+                                               &nodemasks[to]);
+               if (pages_not_moved < 0)
+                       fprintf(stderr,
+                               "migrate_pages failed, errno=%d\n", errno);
+               else if (pages_not_moved > 0)
+                       fprintf(stderr,
+                               "migrate_pages could not move %ld pages\n",
+                               pages_not_moved);
+               else
+                       data->migrations_completed++;
+
+               from = to;
+               to++;
+               if (to == nodes)
+                       to = 0;
+
+               now = time(NULL);
+               if (((now - start_time) % interval_secs == 0) &&
+                   (now != last_update)) {
+                       last_update = now;
+                       fprintf(stderr,
+                               "%lu seconds: Migrations attempted=%lu completed=%lu, "
+                               "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+                               now - start_time, data->migrations_attempted,
+                               data->migrations_completed,
+                               data->ipis_sent, *pipis_rcvd,
+                               data->hlt_count, data->wake_count);
+
+                       TEST_ASSERT(ipis_sent != data->ipis_sent &&
+                                   hlt_count != data->hlt_count &&
+                                   wake_count != data->wake_count,
+                                   "IPI, HLT and wake count have not increased "
+                                   "in the last %lu seconds. "
+                                   "HLTer is likely hung.", interval_secs);
+
+                       ipis_sent = data->ipis_sent;
+                       hlt_count = data->hlt_count;
+                       wake_count = data->wake_count;
+               }
+               usleep(delay_usecs);
+       }
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+                     bool *migrate, int *delay_usecs)
+{
+       for (;;) {
+               int opt = getopt(argc, argv, "s:d:m");
+
+               if (opt == -1)
+                       break;
+               switch (opt) {
+               case 's':
+                       *run_secs = parse_size(optarg);
+                       break;
+               case 'm':
+                       *migrate = true;
+                       break;
+               case 'd':
+                       *delay_usecs = parse_size(optarg);
+                       break;
+               default:
+                       TEST_ASSERT(false,
+                                   "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+                                   "-m adds calls to migrate_pages while vCPUs are running."
+                                   " Default is no migrations.\n"
+                                   "-d <delay microseconds> - delay between migrate_pages() calls."
+                                   " Default is %d microseconds.",
+                                   DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       int r;
+       int wait_secs;
+       const int max_halter_wait = 10;
+       int run_secs = 0;
+       int delay_usecs = 0;
+       struct test_data_page *data;
+       vm_vaddr_t test_data_page_vaddr;
+       bool migrate = false;
+       pthread_t threads[2];
+       struct thread_params params[2];
+       struct kvm_vm *vm;
+       uint64_t *pipis_rcvd;
+
+       get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+       if (run_secs <= 0)
+               run_secs = DEFAULT_RUN_SECS;
+       if (delay_usecs <= 0)
+               delay_usecs = DEFAULT_DELAY_USECS;
+
+       vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
+
+       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
+
+       test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+       data = addr_gva2hva(vm, test_data_page_vaddr);
+       memset(data, 0, sizeof(*data));
+       params[0].data = data;
+       params[1].data = data;
+
+       vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+       vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+
+       pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+       params[0].pipis_rcvd = pipis_rcvd;
+       params[1].pipis_rcvd = pipis_rcvd;
+
+       /* Start halter vCPU thread and wait for it to execute first HLT. */
+       r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+       TEST_ASSERT(r == 0,
+                   "pthread_create halter failed errno=%d", errno);
+       fprintf(stderr, "Halter vCPU thread started\n");
+
+       wait_secs = 0;
+       while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+               sleep(1);
+               wait_secs++;
+       }
+
+       TEST_ASSERT(data->hlt_count,
+                   "Halter vCPU did not execute first HLT within %d seconds",
+                   max_halter_wait);
+
+       fprintf(stderr,
+               "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+               data->halter_apic_id, wait_secs);
+
+       r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+       TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+       fprintf(stderr,
+               "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+               run_secs);
+
+       if (!migrate)
+               sleep(run_secs);
+       else
+               do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+       /*
+        * Cancel threads and wait for them to stop.
+        */
+       cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+       cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+       fprintf(stderr,
+               "Test successful after running for %d seconds.\n"
+               "Sending vCPU sent %lu IPIs to halting vCPU\n"
+               "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+               "Halter APIC ID=%#x\n"
+               "Sender ICR value=%#x ICR2 value=%#x\n"
+               "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+               "Migrations attempted: %lu\n"
+               "Migrations completed: %lu\n",
+               run_secs, data->ipis_sent,
+               data->hlt_count, data->wake_count, *pipis_rcvd,
+               data->halter_apic_id,
+               data->icr, data->icr2,
+               data->halter_tpr, data->halter_ppr, data->halter_lvr,
+               data->migrations_attempted, data->migrations_completed);
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c

new file mode 100644 (file)

index 0000000..88bcca1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+       struct kvm_vcpu *vcpu;
+       bool is_x2apic;
+       bool has_xavic_errata;
+};
+
+static void xapic_guest_code(void)
+{
+       asm volatile("cli");
+
+       xapic_enable();
+
+       while (1) {
+               uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+                              (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+               xapic_write_reg(APIC_ICR2, val >> 32);
+               xapic_write_reg(APIC_ICR, val);
+               GUEST_SYNC(val);
+       }
+}
+
+#define X2APIC_RSVD_BITS_MASK  (GENMASK_ULL(31, 20) | \
+                               GENMASK_ULL(17, 16) | \
+                               GENMASK_ULL(13, 13))
+
+static void x2apic_guest_code(void)
+{
+       asm volatile("cli");
+
+       x2apic_enable();
+
+       do {
+               uint64_t val = x2apic_read_reg(APIC_IRR) |
+                              x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+               if (val & X2APIC_RSVD_BITS_MASK) {
+                       x2apic_write_reg_fault(APIC_ICR, val);
+               } else {
+                       x2apic_write_reg(APIC_ICR, val);
+                       GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
+               }
+               GUEST_SYNC(val);
+       } while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+       struct kvm_vcpu *vcpu = x->vcpu;
+       struct kvm_lapic_state xapic;
+       struct ucall uc;
+       uint64_t icr;
+
+       /*
+        * Tell the guest what ICR value to write.  Use the IRR to pass info,
+        * all bits are valid and should not be modified by KVM (ignoring the
+        * fact that vectors 0-15 are technically illegal).
+        */
+       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+       *((u32 *)&xapic.regs[APIC_IRR]) = val;
+       *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+       vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+       TEST_ASSERT_EQ(uc.args[1], val);
+
+       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+       icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+             (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+       if (!x->is_x2apic) {
+               if (!x->has_xavic_errata)
+                       val &= (-1u | (0xffull << (32 + 24)));
+       } else if (val & X2APIC_RSVD_BITS_MASK) {
+               return;
+       }
+
+       if (x->has_xavic_errata)
+               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+       else
+               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+}
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+       /*
+        * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
+        * it is as _must_ be zero.  Intel simply ignores the bit.  Don't test
+        * the BUSY bit for x2APIC, as there is no single correct behavior.
+        */
+       if (!x->is_x2apic)
+               ____test_icr(x, val | APIC_ICR_BUSY);
+
+       ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+       struct kvm_vcpu *vcpu = x->vcpu;
+       uint64_t icr, i, j;
+
+       icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+       for (i = 0; i <= 0xff; i++)
+               __test_icr(x, icr | i);
+
+       icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+       for (i = 0; i <= 0xff; i++)
+               __test_icr(x, icr | i);
+
+       /*
+        * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
+        * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
+        */
+       icr = APIC_INT_ASSERT | 0xff;
+       for (i = 0; i < 0xff; i++) {
+               if (i == vcpu->id)
+                       continue;
+               for (j = 0; j < 8; j++)
+                       __test_icr(x, i << (32 + 24) | icr | (j << 8));
+       }
+
+       /* And again with a shorthand destination for all types of IPIs. */
+       icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+       for (i = 0; i < 8; i++)
+               __test_icr(x, icr | (i << 8));
+
+       /* And a few garbage value, just make sure it's an IRQ (blocked). */
+       __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+       __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+       __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+{
+       uint32_t apic_id, expected;
+       struct kvm_lapic_state xapic;
+
+       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
+
+       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+       expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
+       apic_id = *((u32 *)&xapic.regs[APIC_ID]);
+
+       TEST_ASSERT(apic_id == expected,
+                   "APIC_ID not set back to %s format; wanted = %x, got = %x",
+                   (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
+                   expected, apic_id);
+}
+
+/*
+ * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
+ * stuffs MSR_IA32_APICBASE.  Setting the APIC_ID when x2APIC is enabled and
+ * when the APIC transitions for DISABLED to ENABLED is architectural behavior
+ * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
+ * attempted to transition from x2APIC to xAPIC without disabling the APIC is
+ * architecturally disallowed.
+ */
+static void test_apic_id(void)
+{
+       const uint32_t NR_VCPUS = 3;
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       uint64_t apic_base;
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
+       vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
+
+       for (i = 0; i < NR_VCPUS; i++) {
+               apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
+
+               TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
+                           "APIC not in ENABLED state at vCPU RESET");
+               TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
+                           "APIC not in xAPIC mode at vCPU RESET");
+
+               __test_apic_id(vcpus[i], apic_base);
+               __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
+               __test_apic_id(vcpus[i], apic_base);
+       }
+
+       kvm_vm_free(vm);
+}
+
+static void test_x2apic_id(void)
+{
+       struct kvm_lapic_state lapic = {};
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+
+       /*
+        * Try stuffing a modified x2APIC ID, KVM should ignore the value and
+        * always return the vCPU's default/readonly x2APIC ID.
+        */
+       for (i = 0; i <= 0xff; i++) {
+               *(u32 *)(lapic.regs + APIC_ID) = i << 24;
+               *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
+               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+
+               vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+               TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
+                           "x2APIC ID should be fully readonly");
+       }
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct xapic_vcpu x = {
+               .vcpu = NULL,
+               .is_x2apic = true,
+       };
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+       test_icr(&x);
+       kvm_vm_free(vm);
+
+       /*
+        * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+        * the guest in order to test AVIC.  KVM disallows changing CPUID after
+        * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+        */
+       vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+       x.is_x2apic = false;
+
+       /*
+        * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
+        * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
+        * drops writes, AMD does not).  Account for the errata when checking
+        * that KVM reads back what was written.
+        */
+       x.has_xavic_errata = host_cpu_is_amd &&
+                            get_kvm_amd_param_bool("avic");
+
+       vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+       test_icr(&x);
+       kvm_vm_free(vm);
+
+       test_apic_id();
+       test_x2apic_id();
+}
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c

new file mode 100644 (file)

index 0000000..c8a5c5e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
+do {                                                                                   \
+       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
+                                                                                       \
+       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
+                      __supported == ((xfeatures) | (dependencies)),                   \
+                      "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx",  \
+                      __supported, (xfeatures), (dependencies));                       \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0.  Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently.  E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures)         \
+do {                                                                   \
+       uint64_t __supported = (supported_xcr0) & (xfeatures);          \
+                                                                       \
+       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
+                      "supported = 0x%lx, xfeatures = 0x%llx",         \
+                      __supported, (xfeatures));                       \
+} while (0)
+
+static void guest_code(void)
+{
+       uint64_t initial_xcr0;
+       uint64_t supported_xcr0;
+       int i, vector;
+
+       set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+       initial_xcr0 = xgetbv(0);
+       supported_xcr0 = this_cpu_supported_xcr0();
+
+       GUEST_ASSERT(initial_xcr0 == supported_xcr0);
+
+       /* Check AVX */
+       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+                                    XFEATURE_MASK_YMM,
+                                    XFEATURE_MASK_SSE);
+
+       /* Check MPX */
+       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+                                   XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+       /* Check AVX-512 */
+       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+                                    XFEATURE_MASK_AVX512,
+                                    XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+                                   XFEATURE_MASK_AVX512);
+
+       /* Check AMX */
+       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+                                   XFEATURE_MASK_XTILE);
+
+       vector = xsetbv_safe(0, XFEATURE_MASK_FP);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(FP), got vector '0x%x'",
+                      vector);
+
+       vector = xsetbv_safe(0, supported_xcr0);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(0x%lx), got vector '0x%x'",
+                      supported_xcr0, vector);
+
+       for (i = 0; i < 64; i++) {
+               if (supported_xcr0 & BIT_ULL(i))
+                       continue;
+
+               vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
+                              BIT_ULL(i), supported_xcr0, vector);
+       }
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Unexpected exit reason: %u (%s),",
+                           run->exit_reason,
+                           exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c

new file mode 100644 (file)

index 0000000..a59b3c7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -0,0 +1,1161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+#define SHINFO_REGION_GVA      0xc0000000ULL
+#define SHINFO_REGION_GPA      0xc0000000ULL
+#define SHINFO_REGION_SLOT     10
+
+#define DUMMY_REGION_GPA       (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT      11
+
+#define DUMMY_REGION_GPA_2     (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2    12
+
+#define SHINFO_ADDR    (SHINFO_REGION_GPA)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define SHINFO_VADDR   (SHINFO_REGION_GVA)
+#define VCPU_INFO_VADDR        (SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define EVTCHN_VECTOR  0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+enum {
+       TEST_INJECT_VECTOR = 0,
+       TEST_RUNSTATE_runnable,
+       TEST_RUNSTATE_blocked,
+       TEST_RUNSTATE_offline,
+       TEST_RUNSTATE_ADJUST,
+       TEST_RUNSTATE_DATA,
+       TEST_STEAL_TIME,
+       TEST_EVTCHN_MASKED,
+       TEST_EVTCHN_UNMASKED,
+       TEST_EVTCHN_SLOWPATH,
+       TEST_EVTCHN_SEND_IOCTL,
+       TEST_EVTCHN_HCALL,
+       TEST_EVTCHN_HCALL_SLOWPATH,
+       TEST_EVTCHN_HCALL_EVENTFD,
+       TEST_TIMER_SETUP,
+       TEST_TIMER_WAIT,
+       TEST_TIMER_RESTORE,
+       TEST_POLL_READY,
+       TEST_POLL_TIMEOUT,
+       TEST_POLL_MASKED,
+       TEST_POLL_WAKE,
+       SET_VCPU_INFO,
+       TEST_TIMER_PAST,
+       TEST_LOCKING_SEND_RACE,
+       TEST_LOCKING_POLL_RACE,
+       TEST_LOCKING_POLL_TIMEOUT,
+       TEST_DONE,
+
+       TEST_GUEST_SAW_IRQ,
+};
+
+#define XEN_HYPERCALL_MSR      0x40000000
+
+#define MIN_STEAL_TIME         50000
+
+#define SHINFO_RACE_TIMEOUT    2       /* seconds */
+
+#define __HYPERVISOR_set_timer_op      15
+#define __HYPERVISOR_sched_op          29
+#define __HYPERVISOR_event_channel_op  32
+
+#define SCHEDOP_poll                   3
+
+#define EVTCHNOP_send                  4
+
+#define EVTCHNSTAT_interdomain         2
+
+struct evtchn_send {
+       u32 port;
+};
+
+struct sched_poll {
+       u32 *ports;
+       unsigned int nr_ports;
+       u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+       u32   version;
+       u32   pad0;
+       u64   tsc_timestamp;
+       u64   system_time;
+       u32   tsc_to_system_mul;
+       s8    tsc_shift;
+       u8    flags;
+       u8    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+       u32   version;
+       u32   sec;
+       u32   nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+       uint32_t state;
+       uint64_t state_entry_time;
+       uint64_t time[5]; /* Extra field for overrun check */
+};
+
+struct compat_vcpu_runstate_info {
+       uint32_t state;
+       uint64_t state_entry_time;
+       uint64_t time[5];
+} __attribute__((__packed__));
+
+struct arch_vcpu_info {
+       unsigned long cr2;
+       unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+       uint8_t evtchn_upcall_pending;
+       uint8_t evtchn_upcall_mask;
+       unsigned long evtchn_pending_sel;
+       struct arch_vcpu_info arch;
+       struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+       struct vcpu_info vcpu_info[32];
+       unsigned long evtchn_pending[64];
+       unsigned long evtchn_mask[64];
+       struct pvclock_wall_clock wc;
+       uint32_t wc_sec_hi;
+       /* arch_shared_info here */
+};
+
+#define RUNSTATE_running  0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked  2
+#define RUNSTATE_offline  3
+
+static const char *runstate_names[] = {
+       "running",
+       "runnable",
+       "blocked",
+       "offline"
+};
+
+struct {
+       struct kvm_irq_routing info;
+       struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+       struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+
+       vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
+       vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
+       guest_saw_irq = true;
+
+       GUEST_SYNC(TEST_GUEST_SAW_IRQ);
+}
+
+static void guest_wait_for_irq(void)
+{
+       while (!guest_saw_irq)
+               __asm__ __volatile__ ("rep nop" : : : "memory");
+       guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+       int i;
+
+       __asm__ __volatile__(
+               "sti\n"
+               "nop\n"
+       );
+
+       /* Trigger an interrupt injection */
+       GUEST_SYNC(TEST_INJECT_VECTOR);
+
+       guest_wait_for_irq();
+
+       /* Test having the host set runstates manually */
+       GUEST_SYNC(TEST_RUNSTATE_runnable);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(TEST_RUNSTATE_blocked);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(TEST_RUNSTATE_offline);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       /* Test runstate time adjust */
+       GUEST_SYNC(TEST_RUNSTATE_ADJUST);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+       /* Test runstate time set */
+       GUEST_SYNC(TEST_RUNSTATE_DATA);
+       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+       /* sched_yield() should result in some 'runnable' time */
+       GUEST_SYNC(TEST_STEAL_TIME);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+       /* Attempt to deliver a *masked* interrupt */
+       GUEST_SYNC(TEST_EVTCHN_MASKED);
+
+       /* Wait until we see the bit set */
+       struct shared_info *si = (void *)SHINFO_VADDR;
+       while (!si->evtchn_pending[0])
+               __asm__ __volatile__ ("rep nop" : : : "memory");
+
+       /* Now deliver an *unmasked* interrupt */
+       GUEST_SYNC(TEST_EVTCHN_UNMASKED);
+
+       guest_wait_for_irq();
+
+       /* Change memslots and deliver an interrupt */
+       GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
+
+       guest_wait_for_irq();
+
+       /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+       GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_EVTCHN_HCALL);
+
+       /* Our turn. Deliver event channel (to ourselves) with
+        * EVTCHNOP_send hypercall. */
+       struct evtchn_send s = { .port = 127 };
+       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+       /*
+        * Same again, but this time the host has messed with memslots so it
+        * should take the slow path in kvm_xen_set_evtchn().
+        */
+       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
+
+       /* Deliver "outbound" event channel to an eventfd which
+        * happens to be one of our own irqfds. */
+       s.port = 197;
+       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_TIMER_SETUP);
+
+       /* Set a timer 100ms in the future. */
+       xen_hypercall(__HYPERVISOR_set_timer_op,
+                     rs->state_entry_time + 100000000, NULL);
+
+       GUEST_SYNC(TEST_TIMER_WAIT);
+
+       /* Now wait for the timer */
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_TIMER_RESTORE);
+
+       /* The host has 'restored' the timer. Just wait for it. */
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_POLL_READY);
+
+       /* Poll for an event channel port which is already set */
+       u32 ports[1] = { EVTCHN_TIMER };
+       struct sched_poll p = {
+               .ports = ports,
+               .nr_ports = 1,
+               .timeout = 0,
+       };
+
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       GUEST_SYNC(TEST_POLL_TIMEOUT);
+
+       /* Poll for an unset port and wait for the timeout. */
+       p.timeout = 100000000;
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       GUEST_SYNC(TEST_POLL_MASKED);
+
+       /* A timer will wake the masked port we're waiting on, while we poll */
+       p.timeout = 0;
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       GUEST_SYNC(TEST_POLL_WAKE);
+
+       /* Set the vcpu_info to point at exactly the place it already is to
+        * make sure the attribute is functional. */
+       GUEST_SYNC(SET_VCPU_INFO);
+
+       /* A timer wake an *unmasked* port which should wake us with an
+        * actual interrupt, while we're polling on a different port. */
+       ports[0]++;
+       p.timeout = 0;
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_TIMER_PAST);
+
+       /* Timer should have fired already */
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_LOCKING_SEND_RACE);
+       /* Racing host ioctls */
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_LOCKING_POLL_RACE);
+       /* Racing vmcall against host ioctl */
+
+       ports[0] = 0;
+
+       p = (struct sched_poll) {
+               .ports = ports,
+               .nr_ports = 1,
+               .timeout = 0
+       };
+
+wait_for_timer:
+       /*
+        * Poll for a timer wake event while the worker thread is mucking with
+        * the shared info.  KVM XEN drops timer IRQs if the shared info is
+        * invalid when the timer expires.  Arbitrarily poll 100 times before
+        * giving up and asking the VMM to re-arm the timer.  100 polls should
+        * consume enough time to beat on KVM without taking too long if the
+        * timer IRQ is dropped due to an invalid event channel.
+        */
+       for (i = 0; i < 100 && !guest_saw_irq; i++)
+               __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       /*
+        * Re-send the timer IRQ if it was (likely) dropped due to the timer
+        * expiring while the event channel was invalid.
+        */
+       if (!guest_saw_irq) {
+               GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
+               goto wait_for_timer;
+       }
+       guest_saw_irq = false;
+
+       GUEST_SYNC(TEST_DONE);
+}
+
+static struct shared_info *shinfo;
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+       if (vinfo)
+               printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+       vcpu_dump(stdout, vcpu, 0);
+       TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+       struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+       struct kvm_xen_hvm_attr cache_activate_gfn = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+       };
+
+       struct kvm_xen_hvm_attr cache_deactivate_gfn = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.gfn = KVM_XEN_INVALID_GFN
+       };
+
+       struct kvm_xen_hvm_attr cache_activate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+               .u.shared_info.hva = (unsigned long)shinfo
+       };
+
+       struct kvm_xen_hvm_attr cache_deactivate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.hva = 0
+       };
+
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
+       for (;;) {
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
+               pthread_testcancel();
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+               if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+                       pthread_testcancel();
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+               }
+       }
+
+       return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_xen_hvm_attr evt_reset;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       bool verbose;
+       int ret;
+
+       verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+                              !strncmp(argv[1], "--verbose", 10));
+
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+       bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
+       bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+       bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+       bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Map a region for the shared_info page */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
+
+       shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+       int zero_fd = open("/dev/zero", O_RDONLY);
+       TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+       struct kvm_xen_hvm_config hvmc = {
+               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+               .msr = XEN_HYPERCALL_MSR,
+       };
+
+       /* Let the kernel know that we *will* use it for sending all
+        * event channels, which lets it intercept SCHEDOP_poll */
+       if (do_evtchn_tests)
+               hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+       struct kvm_xen_hvm_attr lm = {
+               .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+               .u.long_mode = 1,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+       if (do_runstate_flag) {
+               struct kvm_xen_hvm_attr ruf = {
+                       .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+                       .u.runstate_update_flag = 1,
+               };
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+               ruf.u.runstate_update_flag = 0;
+               vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+               TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+                           "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+       }
+
+       struct kvm_xen_hvm_attr ha = {};
+
+       if (has_shinfo_hva) {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+               ha.u.shared_info.hva = (unsigned long)shinfo;
+       } else {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+               ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+       }
+
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+       /*
+        * Test what happens when the HVA of the shinfo page is remapped after
+        * the kernel has a reference to it. But make sure we copy the clock
+        * info over since that's only set at setup time, and we test it later.
+        */
+       struct pvclock_wall_clock wc_copy = shinfo->wc;
+       void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+       TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+       shinfo->wc = wc_copy;
+
+       struct kvm_xen_vcpu_attr vi = {
+               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+               .u.gpa = VCPU_INFO_ADDR,
+       };
+       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+       struct kvm_xen_vcpu_attr pvclock = {
+               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+               .u.gpa = PVTIME_ADDR,
+       };
+       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+       struct kvm_xen_hvm_attr vec = {
+               .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+               .u.vector = EVTCHN_VECTOR,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+       vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+       if (do_runstate_tests) {
+               struct kvm_xen_vcpu_attr st = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                       .u.gpa = RUNSTATE_ADDR,
+               };
+               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+       }
+
+       int irq_fd[2] = { -1, -1 };
+
+       if (do_eventfd_tests) {
+               irq_fd[0] = eventfd(0, 0);
+               irq_fd[1] = eventfd(0, 0);
+
+               /* Unexpected, but not a KVM failure */
+               if (irq_fd[0] == -1 || irq_fd[1] == -1)
+                       do_evtchn_tests = do_eventfd_tests = false;
+       }
+
+       if (do_eventfd_tests) {
+               irq_routes.info.nr = 2;
+
+               irq_routes.entries[0].gsi = 32;
+               irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+               irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+               irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+               irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+               irq_routes.entries[1].gsi = 33;
+               irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+               irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+               irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+               irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+               vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+               struct kvm_irqfd ifd = { };
+
+               ifd.fd = irq_fd[0];
+               ifd.gsi = 32;
+               vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+               ifd.fd = irq_fd[1];
+               ifd.gsi = 33;
+               vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+               struct sigaction sa = { };
+               sa.sa_handler = handle_alrm;
+               sigaction(SIGALRM, &sa, NULL);
+       }
+
+       struct kvm_xen_vcpu_attr tmr = {
+               .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+               .u.timer.port = EVTCHN_TIMER,
+               .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+               .u.timer.expires_ns = 0
+       };
+
+       if (do_evtchn_tests) {
+               struct kvm_xen_hvm_attr inj = {
+                       .type = KVM_XEN_ATTR_TYPE_EVTCHN,
+                       .u.evtchn.send_port = 127,
+                       .u.evtchn.type = EVTCHNSTAT_interdomain,
+                       .u.evtchn.flags = 0,
+                       .u.evtchn.deliver.port.port = EVTCHN_TEST1,
+                       .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+                       .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+               };
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+               /* Test migration to a different vCPU */
+               inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+               inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+               inj.u.evtchn.send_port = 197;
+               inj.u.evtchn.deliver.eventfd.port = 0;
+               inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+               inj.u.evtchn.flags = 0;
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+       }
+       vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+       vinfo->evtchn_upcall_pending = 0;
+
+       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+       rs->state = 0x5a;
+
+       bool evtchn_irq_expected = false;
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC: {
+                       struct kvm_xen_vcpu_attr rst;
+                       long rundelay;
+
+                       if (do_runstate_tests)
+                               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                           rs->time[1] + rs->time[2] + rs->time[3],
+                                           "runstate times don't add up");
+
+                       switch (uc.args[1]) {
+                       case TEST_INJECT_VECTOR:
+                               if (verbose)
+                                       printf("Delivering evtchn upcall\n");
+                               evtchn_irq_expected = true;
+                               vinfo->evtchn_upcall_pending = 1;
+                               break;
+
+                       case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
+                               TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+                               if (!do_runstate_tests)
+                                       goto done;
+                               if (verbose)
+                                       printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+                               rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+                                       TEST_RUNSTATE_runnable;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case TEST_RUNSTATE_ADJUST:
+                               if (verbose)
+                                       printf("Testing RUNSTATE_ADJUST\n");
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = (uint64_t)-1;
+                               rst.u.runstate.time_blocked =
+                                       0x5a - rs->time[RUNSTATE_blocked];
+                               rst.u.runstate.time_offline =
+                                       0x6b6b - rs->time[RUNSTATE_offline];
+                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+                                       rst.u.runstate.time_offline;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case TEST_RUNSTATE_DATA:
+                               if (verbose)
+                                       printf("Testing RUNSTATE_DATA\n");
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = RUNSTATE_running;
+                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+                               rst.u.runstate.time_blocked = 0x6b6b;
+                               rst.u.runstate.time_offline = 0x5a;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case TEST_STEAL_TIME:
+                               if (verbose)
+                                       printf("Testing steal time\n");
+                               /* Yield until scheduler delay exceeds target */
+                               rundelay = get_run_delay() + MIN_STEAL_TIME;
+                               do {
+                                       sched_yield();
+                               } while (get_run_delay() < rundelay);
+                               break;
+
+                       case TEST_EVTCHN_MASKED:
+                               if (!do_eventfd_tests)
+                                       goto done;
+                               if (verbose)
+                                       printf("Testing masked event channel\n");
+                               shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+                               eventfd_write(irq_fd[0], 1UL);
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_UNMASKED:
+                               if (verbose)
+                                       printf("Testing unmasked event channel\n");
+                               /* Unmask that, but deliver the other one */
+                               shinfo->evtchn_pending[0] = 0;
+                               shinfo->evtchn_mask[0] = 0;
+                               eventfd_write(irq_fd[1], 1UL);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_SLOWPATH:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[1] = 0;
+                               if (verbose)
+                                       printf("Testing event channel after memslot change\n");
+                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                                           DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+                               eventfd_write(irq_fd[0], 1UL);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_SEND_IOCTL:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               if (!do_evtchn_tests)
+                                       goto done;
+
+                               shinfo->evtchn_pending[0] = 0;
+                               if (verbose)
+                                       printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+                               struct kvm_irq_routing_xen_evtchn e;
+                               e.port = EVTCHN_TEST2;
+                               e.vcpu = vcpu->id;
+                               e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+                               vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_HCALL:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[1] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_HCALL_SLOWPATH:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[0] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                                           DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_HCALL_EVENTFD:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[0] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest EVTCHNOP_send to eventfd\n");
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_TIMER_SETUP:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[1] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest oneshot timer\n");
+                               break;
+
+                       case TEST_TIMER_WAIT:
+                               memset(&tmr, 0, sizeof(tmr));
+                               tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+                               TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+                                           "Timer port not returned");
+                               TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+                                           "Timer priority not returned");
+                               TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+                                           "Timer expiry not returned");
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_TIMER_RESTORE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[0] = 0;
+
+                               if (verbose)
+                                       printf("Testing restored oneshot timer\n");
+
+                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_READY:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll with already pending event\n");
+                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_TIMEOUT:
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll timeout\n");
+                               shinfo->evtchn_pending[0] = 0;
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_MASKED:
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll wake on masked event\n");
+
+                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_WAKE:
+                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+                               evtchn_irq_expected = true;
+                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+                               /* Read it back and check the pending time is reported correctly */
+                               tmr.u.timer.expires_ns = 0;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+                               TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+                                           "Timer not reported pending");
+                               alarm(1);
+                               break;
+
+                       case SET_VCPU_INFO:
+                               if (has_shinfo_hva) {
+                                       struct kvm_xen_vcpu_attr vih = {
+                                               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+                                               .u.hva = (unsigned long)vinfo
+                                       };
+                                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+                               }
+                               break;
+
+                       case TEST_TIMER_PAST:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               /* Read timer and check it is no longer pending */
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+                               TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+                               shinfo->evtchn_pending[0] = 0;
+                               if (verbose)
+                                       printf("Testing timer in the past\n");
+
+                               evtchn_irq_expected = true;
+                               tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               alarm(1);
+                               break;
+
+                       case TEST_LOCKING_SEND_RACE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               alarm(0);
+
+                               if (verbose)
+                                       printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+                               ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+                               TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+                               struct kvm_irq_routing_xen_evtchn uxe = {
+                                       .port = 1,
+                                       .vcpu = vcpu->id,
+                                       .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+                               };
+
+                               evtchn_irq_expected = true;
+                               for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+                                       __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+                               break;
+
+                       case TEST_LOCKING_POLL_RACE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+
+                               if (verbose)
+                                       printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+                               shinfo->evtchn_pending[0] = 1;
+
+                               evtchn_irq_expected = true;
+                               tmr.u.timer.expires_ns = rs->state_entry_time +
+                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               break;
+
+                       case TEST_LOCKING_POLL_TIMEOUT:
+                               /*
+                                * Optional and possibly repeated sync point.
+                                * Injecting the timer IRQ may fail if the
+                                * shinfo is invalid when the timer expires.
+                                * If the timer has expired but the IRQ hasn't
+                                * been delivered, rearm the timer and retry.
+                                */
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+                               /* Resume the guest if the timer is still pending. */
+                               if (tmr.u.timer.expires_ns)
+                                       break;
+
+                               /* All done if the IRQ was delivered. */
+                               if (!evtchn_irq_expected)
+                                       break;
+
+                               tmr.u.timer.expires_ns = rs->state_entry_time +
+                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               break;
+                       case TEST_DONE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+
+                               ret = pthread_cancel(thread);
+                               TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+                               ret = pthread_join(thread, 0);
+                               TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+                               goto done;
+
+                       case TEST_GUEST_SAW_IRQ:
+                               TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+                               evtchn_irq_expected = false;
+                               break;
+                       }
+                       break;
+               }
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+
+ done:
+       evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+       evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
+
+       alarm(0);
+
+       /*
+        * Just a *really* basic check that things are being put in the
+        * right place. The actual calculations are much the same for
+        * Xen as they are for the KVM variants, so no need to check.
+        */
+       struct pvclock_wall_clock *wc;
+       struct pvclock_vcpu_time_info *ti, *ti2;
+       struct kvm_clock_data kcdata;
+       long long delta;
+
+       wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+       ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+       ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+       if (verbose) {
+               printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+               printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+                      ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+                      ti->tsc_shift, ti->flags);
+               printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+                      ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+                      ti2->tsc_shift, ti2->flags);
+       }
+
+       TEST_ASSERT(wc->version && !(wc->version & 1),
+                   "Bad wallclock version %x", wc->version);
+
+       vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
+
+       if (kcdata.flags & KVM_CLOCK_REALTIME) {
+               if (verbose) {
+                       printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
+                              kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
+                       printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
+                              kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
+               }
+
+               delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
+
+               /*
+                * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
+                * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
+                * delta as testing clock accuracy is not the goal here. The test just needs to
+                * check that the value in shinfo is somewhat sane.
+                */
+               TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
+                           "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
+                           wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
+                           (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
+       } else {
+               pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
+       }
+
+       TEST_ASSERT(ti->version && !(ti->version & 1),
+                   "Bad time_info version %x", ti->version);
+       TEST_ASSERT(ti2->version && !(ti2->version & 1),
+                   "Bad time_info version %x", ti->version);
+
+       if (do_runstate_tests) {
+               /*
+                * Fetch runstate and check sanity. Strictly speaking in the
+                * general case we might not expect the numbers to be identical
+                * but in this case we know we aren't running the vCPU any more.
+                */
+               struct kvm_xen_vcpu_attr rst = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+               };
+               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+               if (verbose) {
+                       printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+                              rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+                              rs->state, rs->state_entry_time);
+                       for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+                               printf("State %s: %" PRIu64 " ns\n",
+                                      runstate_names[i], rs->time[i]);
+                       }
+               }
+
+               /*
+                * Exercise runstate info at all points across the page boundary, in
+                * 32-bit and 64-bit mode. In particular, test the case where it is
+                * configured in 32-bit mode and then switched to 64-bit mode while
+                * active, which takes it onto the second page.
+                */
+               unsigned long runstate_addr;
+               struct compat_vcpu_runstate_info *crs;
+               for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+                    runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+                       rs = addr_gpa2hva(vm, runstate_addr);
+                       crs = (void *)rs;
+
+                       memset(rs, 0xa5, sizeof(*rs));
+
+                       /* Set to compatibility mode */
+                       lm.u.long_mode = 0;
+                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+                       /* Set runstate to new address (kernel will write it) */
+                       struct kvm_xen_vcpu_attr st = {
+                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                               .u.gpa = runstate_addr,
+                       };
+                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+                       if (verbose)
+                               printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+                       TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+                       TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+                                   "State entry time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                                   "Running time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                                   "Runnable time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                                   "Blocked time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                                   "Offline time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+                                   "Structure overrun");
+                       TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+                                   crs->time[1] + crs->time[2] + crs->time[3],
+                                   "runstate times don't add up");
+
+
+                       /* Now switch to 64-bit mode */
+                       lm.u.long_mode = 1;
+                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+                       memset(rs, 0xa5, sizeof(*rs));
+
+                       /* Don't change the address, just trigger a write */
+                       struct kvm_xen_vcpu_attr adj = {
+                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+                               .u.runstate.state = (uint64_t)-1
+                       };
+                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+                       if (verbose)
+                               printf("64-bit runstate at %08lx\n", runstate_addr);
+
+                       TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+                       TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+                                   "State entry time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                                   "Running time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                                   "Runnable time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                                   "Blocked time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                                   "Offline time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+                                   "Structure overrun");
+
+                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                   rs->time[1] + rs->time[2] + rs->time[3],
+                                   "runstate times don't add up");
+               }
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c

new file mode 100644 (file)

index 0000000..2585087
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define HCALL_REGION_GPA       0xc0000000ULL
+#define HCALL_REGION_SLOT      10
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR      0x40000200
+#define HV_GUEST_OS_ID_MSR     0x40000000
+#define HV_HYPERCALL_MSR       0x40000001
+
+#define HVCALL_SIGNAL_EVENT            0x005d
+#define HV_STATUS_INVALID_ALIGNMENT    4
+
+static void guest_code(void)
+{
+       unsigned long rax = INPUTVALUE;
+       unsigned long rdi = ARGVALUE(1);
+       unsigned long rsi = ARGVALUE(2);
+       unsigned long rdx = ARGVALUE(3);
+       unsigned long rcx;
+       register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+       register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+       register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+       /* First a direct invocation of 'vmcall' */
+       __asm__ __volatile__("vmcall" :
+                            "=a"(rax) :
+                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+                            "r"(r10), "r"(r8), "r"(r9));
+       GUEST_ASSERT(rax == RETVALUE);
+
+       /* Fill in the Xen hypercall page */
+       __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+                            "a" (HCALL_REGION_GPA & 0xffffffff),
+                            "d" (HCALL_REGION_GPA >> 32));
+
+       /* Set Hyper-V Guest OS ID */
+       __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+                            "a" (0x5a), "d" (0));
+
+       /* Hyper-V hypercall page */
+       u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+       __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+                            "a" (msrval & 0xffffffff),
+                            "d" (msrval >> 32));
+
+       /* Invoke a Xen hypercall */
+       __asm__ __volatile__("call *%1" : "=a"(rax) :
+                            "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+                            "r"(r10), "r"(r8), "r"(r9));
+       GUEST_ASSERT(rax == RETVALUE);
+
+       /* Invoke a Hyper-V hypercall */
+       rax = 0;
+       rcx = HVCALL_SIGNAL_EVENT;      /* code */
+       rdx = 0x5a5a5a5a;               /* ingpa (badly aligned) */
+       __asm__ __volatile__("call *%1" : "=a"(rax) :
+                            "r"(HCALL_REGION_GPA + PAGE_SIZE),
+                            "a"(rax), "c"(rcx), "d"(rdx),
+                            "r"(r8));
+       GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int xen_caps;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_set_hv_cpuid(vcpu);
+
+       struct kvm_xen_hvm_config hvmc = {
+               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+               .msr = XEN_HYPERCALL_MSR,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+       /* Map a region for the hypercall pages */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+       virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
+
+       for (;;) {
+               volatile struct kvm_run *run = vcpu->run;
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+
+               if (run->exit_reason == KVM_EXIT_XEN) {
+                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+                       run->xen.u.hcall.result = RETVALUE;
+                       continue;
+               }
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c

new file mode 100644 (file)

index 0000000..f331a4e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_BITS      64
+
+int main(int argc, char *argv[])
+{
+       bool xss_in_msr_list;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint64_t xss_val;
+       int i, r;
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
+
+       xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
+       TEST_ASSERT(xss_val == 0,
+                   "MSR_IA32_XSS should be initialized to zero");
+
+       vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
+
+       /*
+        * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+        * that trying to set the guest IA32_XSS to an unsupported value fails.
+        * Also, in the future when a non-zero value succeeds check that
+        * IA32_XSS is in the list of MSRs to save/restore.
+        */
+       xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
+       for (i = 0; i < MSR_BITS; ++i) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+               /*
+                * Setting a list of MSRs returns the entry that "faulted", or
+                * the last entry +1 if all MSRs were successfully written.
+                */
+               TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+               TEST_ASSERT(r != 1 || xss_in_msr_list,
+                           "IA32_XSS was able to be set, but was not in save/restore list");
+       }
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c

deleted file mode 100644 (file)

index f4ce5a1..0000000
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ /dev/null
@@ -1,315 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * amx tests
- *
- * Copyright (C) 2021, Intel, Inc.
- *
- * Tests for amx #NM exception and save/restore.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/syscall.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#ifndef __x86_64__
-# error This test is 64-bit only
-#endif
-
-#define NUM_TILES                      8
-#define TILE_SIZE                      1024
-#define XSAVE_SIZE                     ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
-
-/* Tile configuration associated: */
-#define PALETTE_TABLE_INDEX            1
-#define MAX_TILES                      16
-#define RESERVED_BYTES                 14
-
-#define XSAVE_HDR_OFFSET               512
-
-struct tile_config {
-       u8  palette_id;
-       u8  start_row;
-       u8  reserved[RESERVED_BYTES];
-       u16 colsb[MAX_TILES];
-       u8  rows[MAX_TILES];
-};
-
-struct tile_data {
-       u8 data[NUM_TILES * TILE_SIZE];
-};
-
-struct xtile_info {
-       u16 bytes_per_tile;
-       u16 bytes_per_row;
-       u16 max_names;
-       u16 max_rows;
-       u32 xsave_offset;
-       u32 xsave_size;
-};
-
-static struct xtile_info xtile;
-
-static inline void __ldtilecfg(void *cfg)
-{
-       asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
-                    : : "a"(cfg));
-}
-
-static inline void __tileloadd(void *tile)
-{
-       asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
-                    : : "a"(tile), "d"(0));
-}
-
-static inline void __tilerelease(void)
-{
-       asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
-}
-
-static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
-{
-       uint32_t rfbm_lo = rfbm;
-       uint32_t rfbm_hi = rfbm >> 32;
-
-       asm volatile("xsavec (%%rdi)"
-                    : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
-                    : "memory");
-}
-
-static void check_xtile_info(void)
-{
-       GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
-
-       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
-
-       xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
-       GUEST_ASSERT(xtile.xsave_offset == 2816);
-       xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
-       GUEST_ASSERT(xtile.xsave_size == 8192);
-       GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
-
-       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
-                    PALETTE_TABLE_INDEX);
-
-       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
-       xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
-       GUEST_ASSERT(xtile.max_names == 8);
-       xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
-       GUEST_ASSERT(xtile.bytes_per_tile == 1024);
-       xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
-       GUEST_ASSERT(xtile.bytes_per_row == 64);
-       xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
-       GUEST_ASSERT(xtile.max_rows == 16);
-}
-
-static void set_tilecfg(struct tile_config *cfg)
-{
-       int i;
-
-       /* Only palette id 1 */
-       cfg->palette_id = 1;
-       for (i = 0; i < xtile.max_names; i++) {
-               cfg->colsb[i] = xtile.bytes_per_row;
-               cfg->rows[i] = xtile.max_rows;
-       }
-}
-
-static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
-                                                   struct tile_data *tiledata,
-                                                   struct xstate *xstate)
-{
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
-                    this_cpu_has(X86_FEATURE_OSXSAVE));
-       check_xtile_info();
-       GUEST_SYNC(1);
-
-       /* xfd=0, enable amx */
-       wrmsr(MSR_IA32_XFD, 0);
-       GUEST_SYNC(2);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
-       set_tilecfg(amx_cfg);
-       __ldtilecfg(amx_cfg);
-       GUEST_SYNC(3);
-       /* Check save/restore when trap to userspace */
-       __tileloadd(tiledata);
-       GUEST_SYNC(4);
-       __tilerelease();
-       GUEST_SYNC(5);
-       /*
-        * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
-        * the xcomp_bv.
-        */
-       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
-       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
-       GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
-
-       /* xfd=0x40000, disable amx tiledata */
-       wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
-
-       /*
-        * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
-        * remains the same even when amx tiledata is disabled by IA32_XFD.
-        */
-       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
-       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
-       GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
-
-       GUEST_SYNC(6);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-       set_tilecfg(amx_cfg);
-       __ldtilecfg(amx_cfg);
-       /* Trigger #NM exception */
-       __tileloadd(tiledata);
-       GUEST_SYNC(10);
-
-       GUEST_DONE();
-}
-
-void guest_nm_handler(struct ex_regs *regs)
-{
-       /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
-       GUEST_SYNC(7);
-       GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-       GUEST_SYNC(8);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-       /* Clear xfd_err */
-       wrmsr(MSR_IA32_XFD_ERR, 0);
-       /* xfd=0, enable amx */
-       wrmsr(MSR_IA32_XFD, 0);
-       GUEST_SYNC(9);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_regs regs1, regs2;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_x86_state *state;
-       int xsave_restore_size;
-       vm_vaddr_t amx_cfg, tiledata, xstate;
-       struct ucall uc;
-       u32 amx_offset;
-       int ret;
-
-       /*
-        * Note, all off-by-default features must be enabled before anything
-        * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
-        */
-       vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
-                   "KVM should enumerate max XSAVE size when XSAVE is supported");
-       xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
-
-       vcpu_regs_get(vcpu, &regs1);
-
-       /* Register #NM handler */
-       vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
-       /* amx cfg for guest_code */
-       amx_cfg = vm_vaddr_alloc_page(vm);
-       memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
-
-       /* amx tiledata for guest_code */
-       tiledata = vm_vaddr_alloc_pages(vm, 2);
-       memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
-
-       /* XSAVE state for guest_code */
-       xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
-       memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
-       vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
-
-       for (;;) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       switch (uc.args[1]) {
-                       case 1:
-                       case 2:
-                       case 3:
-                       case 5:
-                       case 6:
-                       case 7:
-                       case 8:
-                               fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
-                               break;
-                       case 4:
-                       case 10:
-                               fprintf(stderr,
-                               "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
-
-                               /* Compacted mode, get amx offset by xsave area
-                                * size subtract 8K amx size.
-                                */
-                               amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
-                               state = vcpu_save_state(vcpu);
-                               void *amx_start = (void *)state->xsave + amx_offset;
-                               void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
-                               /* Only check TMM0 register, 1 tile */
-                               ret = memcmp(amx_start, tiles_data, TILE_SIZE);
-                               TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
-                               kvm_x86_state_cleanup(state);
-                               break;
-                       case 9:
-                               fprintf(stderr,
-                               "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
-                               break;
-                       }
-                       break;
-               case UCALL_DONE:
-                       fprintf(stderr, "UCALL_DONE\n");
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               state = vcpu_save_state(vcpu);
-               memset(&regs1, 0, sizeof(regs1));
-               vcpu_regs_get(vcpu, &regs1);
-
-               kvm_vm_release(vm);
-
-               /* Restore state in a new VM.  */
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-               kvm_x86_state_cleanup(state);
-
-               memset(&regs2, 0, sizeof(regs2));
-               vcpu_regs_get(vcpu, &regs2);
-               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                           (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c

deleted file mode 100644 (file)

index f8916bb..0000000
--- a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2024 Intel Corporation
- *
- * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
- * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS.  Start the APIC timer by
- * programming TMICT (timer initial count) to the largest value possible (so
- * that the timer will not expire during the test).  Then, after an arbitrary
- * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
- * of the expected value based on the time elapsed, the APIC bus frequency, and
- * the programmed TDCR (timer divide configuration register).
- */
-
-#include "apic.h"
-#include "test_util.h"
-
-/*
- * Possible TDCR values with matching divide count. Used to modify APIC
- * timer frequency.
- */
-static const struct {
-       const uint32_t tdcr;
-       const uint32_t divide_count;
-} tdcrs[] = {
-       {0x0, 2},
-       {0x1, 4},
-       {0x2, 8},
-       {0x3, 16},
-       {0x8, 32},
-       {0x9, 64},
-       {0xa, 128},
-       {0xb, 1},
-};
-
-static bool is_x2apic;
-
-static void apic_enable(void)
-{
-       if (is_x2apic)
-               x2apic_enable();
-       else
-               xapic_enable();
-}
-
-static uint32_t apic_read_reg(unsigned int reg)
-{
-       return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
-}
-
-static void apic_write_reg(unsigned int reg, uint32_t val)
-{
-       if (is_x2apic)
-               x2apic_write_reg(reg, val);
-       else
-               xapic_write_reg(reg, val);
-}
-
-static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
-{
-       uint64_t tsc_hz = guest_tsc_khz * 1000;
-       const uint32_t tmict = ~0u;
-       uint64_t tsc0, tsc1, freq;
-       uint32_t tmcct;
-       int i;
-
-       apic_enable();
-
-       /*
-        * Setup one-shot timer.  The vector does not matter because the
-        * interrupt should not fire.
-        */
-       apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
-
-       for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
-               apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
-               apic_write_reg(APIC_TMICT, tmict);
-
-               tsc0 = rdtsc();
-               udelay(delay_ms * 1000);
-               tmcct = apic_read_reg(APIC_TMCCT);
-               tsc1 = rdtsc();
-
-               /*
-                * Stop the timer _after_ reading the current, final count, as
-                * writing the initial counter also modifies the current count.
-                */
-               apic_write_reg(APIC_TMICT, 0);
-
-               freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
-               /* Check if measured frequency is within 5% of configured frequency. */
-               __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
-                              "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
-                              freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
-                              apic_hz, tdcrs[i].divide_count, tsc_hz);
-       }
-
-       GUEST_DONE();
-}
-
-static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
-{
-       bool done = false;
-       struct ucall uc;
-
-       while (!done) {
-               vcpu_run(vcpu);
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-                       break;
-               }
-       }
-}
-
-static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
-                                   bool x2apic)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int ret;
-
-       is_x2apic = x2apic;
-
-       vm = vm_create(1);
-
-       sync_global_to_guest(vm, is_x2apic);
-
-       vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
-                     NSEC_PER_SEC / apic_hz);
-
-       vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
-       vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
-
-       ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
-                             NSEC_PER_SEC / apic_hz);
-       TEST_ASSERT(ret < 0 && errno == EINVAL,
-                   "Setting of APIC bus frequency after vCPU is created should fail.");
-
-       if (!is_x2apic)
-               virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       test_apic_bus_clock(vcpu);
-       kvm_vm_free(vm);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
-       puts("");
-       printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
-       printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
-       puts("");
-}
-
-int main(int argc, char *argv[])
-{
-       /*
-        * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
-        * different enough from the default 1GHz to be interesting.
-        */
-       uint64_t apic_hz = 25 * 1000 * 1000;
-       uint64_t delay_ms = 100;
-       int opt;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
-
-       while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
-               switch (opt) {
-               case 'f':
-                       apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
-                       break;
-               case 'd':
-                       delay_ms = atoi_positive("Delay in milliseconds", optarg);
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       exit(KSFT_SKIP);
-               }
-       }
-
-       run_apic_bus_clock_test(apic_hz, delay_ms, false);
-       run_apic_bus_clock_test(apic_hz, delay_ms, true);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c

deleted file mode 100644 (file)

index 7b3fda6..0000000
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ /dev/null
@@ -1,225 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat Inc.
- *
- * Generic tests for KVM CPUID set/get ioctls
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct cpuid_mask {
-       union {
-               struct {
-                       u32 eax;
-                       u32 ebx;
-                       u32 ecx;
-                       u32 edx;
-               };
-               u32 regs[4];
-       };
-};
-
-static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
-{
-       int i;
-       u32 eax, ebx, ecx, edx;
-
-       for (i = 0; i < guest_cpuid->nent; i++) {
-               __cpuid(guest_cpuid->entries[i].function,
-                       guest_cpuid->entries[i].index,
-                       &eax, &ebx, &ecx, &edx);
-
-               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
-               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
-               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
-               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
-       }
-
-}
-
-static void guest_main(struct kvm_cpuid2 *guest_cpuid)
-{
-       GUEST_SYNC(1);
-
-       test_guest_cpuids(guest_cpuid);
-
-       GUEST_SYNC(2);
-
-       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
-
-       GUEST_DONE();
-}
-
-static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
-{
-       struct cpuid_mask mask;
-
-       memset(&mask, 0xff, sizeof(mask));
-
-       switch (entry->function) {
-       case 0x1:
-               mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
-               break;
-       case 0x7:
-               mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
-               break;
-       case 0xd:
-               /*
-                * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
-                * XCR0 and IA32_XSS MSR values.
-                */
-               if (entry->index < 2)
-                       mask.ebx = 0;
-               break;
-       }
-       return mask;
-}
-
-static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
-                          const struct kvm_cpuid2 *cpuid2)
-{
-       const struct kvm_cpuid_entry2 *e1, *e2;
-       int i;
-
-       TEST_ASSERT(cpuid1->nent == cpuid2->nent,
-                   "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
-
-       for (i = 0; i < cpuid1->nent; i++) {
-               struct cpuid_mask mask;
-
-               e1 = &cpuid1->entries[i];
-               e2 = &cpuid2->entries[i];
-
-               TEST_ASSERT(e1->function == e2->function &&
-                           e1->index == e2->index && e1->flags == e2->flags,
-                           "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
-                           i, e1->function, e1->index, e1->flags,
-                           e2->function, e2->index, e2->flags);
-
-               /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
-               mask = get_const_cpuid_mask(e1);
-
-               TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
-                           (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
-                           (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
-                           (e1->edx & mask.edx) == (e2->edx & mask.edx),
-                           "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
-                           e1->function, e1->index,
-                           e1->eax & mask.eax, e1->ebx & mask.ebx,
-                           e1->ecx & mask.ecx, e1->edx & mask.edx,
-                           e2->eax & mask.eax, e2->ebx & mask.ebx,
-                           e2->ecx & mask.ecx, e2->edx & mask.edx);
-       }
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage + 1,
-                           "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage + 1, (ulong)uc.args[1]);
-               return;
-       case UCALL_DONE:
-               return;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_ASSERT(false, "Unexpected exit: %s",
-                           exit_reason_str(vcpu->run->exit_reason));
-       }
-}
-
-struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
-{
-       int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
-       vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
-       struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
-
-       memcpy(guest_cpuids, cpuid, size);
-
-       *p_gva = gva;
-       return guest_cpuids;
-}
-
-static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid_entry2 *ent;
-       int rc;
-       u32 eax, ebx, x;
-
-       /* Setting unmodified CPUID is allowed */
-       rc = __vcpu_set_cpuid(vcpu);
-       TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
-
-       /* Changing CPU features is forbidden */
-       ent = vcpu_get_cpuid_entry(vcpu, 0x7);
-       ebx = ent->ebx;
-       ent->ebx--;
-       rc = __vcpu_set_cpuid(vcpu);
-       TEST_ASSERT(rc, "Changing CPU features should fail");
-       ent->ebx = ebx;
-
-       /* Changing MAXPHYADDR is forbidden */
-       ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
-       eax = ent->eax;
-       x = eax & 0xff;
-       ent->eax = (eax & ~0xffu) | (x - 1);
-       rc = __vcpu_set_cpuid(vcpu);
-       TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
-       ent->eax = eax;
-}
-
-static void test_get_cpuid2(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
-       int i, r;
-
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
-       TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
-                   "KVM didn't update nent on success, wanted %u, got %u",
-                   vcpu->cpuid->nent, cpuid->nent);
-
-       for (i = 0; i < vcpu->cpuid->nent; i++) {
-               cpuid->nent = i;
-               r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
-               TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
-               TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
-       }
-       free(cpuid);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t cpuid_gva;
-       struct kvm_vm *vm;
-       int stage;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
-
-       vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
-
-       vcpu_args_set(vcpu, 1, cpuid_gva);
-
-       for (stage = 0; stage < 3; stage++)
-               run_vcpu(vcpu, stage);
-
-       set_cpuid_after_run(vcpu);
-
-       test_get_cpuid2(vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c

deleted file mode 100644 (file)

index 28cc664..0000000
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CR4 and CPUID sync test
- *
- * Copyright 2018, Red Hat, Inc. and/or its affiliates.
- *
- * Author:
- *   Wei Huang <wei@redhat.com>
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define MAGIC_HYPERCALL_PORT   0x80
-
-static void guest_code(void)
-{
-       u32 regs[4] = {
-               [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
-               [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
-       };
-
-       /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
-       GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
-
-       /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-
-       /*
-        * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
-        * and then restore CR4.  Do this all in  assembly to ensure no AVX
-        * instructions are executed while OSXSAVE=0.
-        */
-       asm volatile (
-               "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
-               "cpuid\n\t"
-               "mov %%rdi, %%cr4\n\t"
-               : "+a" (regs[KVM_CPUID_EAX]),
-                 "=b" (regs[KVM_CPUID_EBX]),
-                 "+c" (regs[KVM_CPUID_ECX]),
-                 "=d" (regs[KVM_CPUID_EDX])
-               : "D" (get_cr4())
-       );
-
-       /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
-       GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
-
-       /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_sregs sregs;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       while (1) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
-                   vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
-                       /* emulate hypervisor clearing CR4.OSXSAVE */
-                       vcpu_sregs_get(vcpu, &sregs);
-                       sregs.cr4 &= ~X86_CR4_OSXSAVE;
-                       vcpu_sregs_set(vcpu, &sregs);
-                       continue;
-               }
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c

deleted file mode 100644 (file)

index 2d814c1..0000000
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ /dev/null
@@ -1,217 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM guest debug register tests
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <string.h>
-#include "kvm_util.h"
-#include "processor.h"
-#include "apic.h"
-
-#define DR6_BD         (1 << 13)
-#define DR7_GD         (1 << 13)
-
-#define IRQ_VECTOR 0xAA
-
-/* For testing data access debug BP */
-uint32_t guest_value;
-
-extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
-
-static void guest_code(void)
-{
-       /* Create a pending interrupt on current vCPU */
-       x2apic_enable();
-       x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
-                        APIC_DM_FIXED | IRQ_VECTOR);
-
-       /*
-        * Software BP tests.
-        *
-        * NOTE: sw_bp need to be before the cmd here, because int3 is an
-        * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
-        * capture it using the vcpu exception bitmap).
-        */
-       asm volatile("sw_bp: int3");
-
-       /* Hardware instruction BP test */
-       asm volatile("hw_bp: nop");
-
-       /* Hardware data BP test */
-       asm volatile("mov $1234,%%rax;\n\t"
-                    "mov %%rax,%0;\n\t write_data:"
-                    : "=m" (guest_value) : : "rax");
-
-       /*
-        * Single step test, covers 2 basic instructions and 2 emulated
-        *
-        * Enable interrupts during the single stepping to see that pending
-        * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
-        *
-        * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
-        * exits to userspace due to single-step being enabled.
-        */
-       asm volatile("ss_start: "
-                    "sti\n\t"
-                    "xor %%eax,%%eax\n\t"
-                    "cpuid\n\t"
-                    "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
-                    "wrmsr\n\t"
-                    "cli\n\t"
-                    : : : "eax", "ebx", "ecx", "edx");
-
-       /* DR6.BD test */
-       asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
-       GUEST_DONE();
-}
-
-#define  CAST_TO_RIP(v)  ((unsigned long long)&(v))
-
-static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
-{
-       struct kvm_regs regs;
-
-       vcpu_regs_get(vcpu, &regs);
-       regs.rip += insn_len;
-       vcpu_regs_set(vcpu, &regs);
-}
-
-int main(void)
-{
-       struct kvm_guest_debug debug;
-       unsigned long long target_dr6, target_rip;
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       uint64_t cmd;
-       int i;
-       /* Instruction lengths starting at ss_start */
-       int ss_size[6] = {
-               1,              /* sti*/
-               2,              /* xor */
-               2,              /* cpuid */
-               5,              /* mov */
-               2,              /* rdmsr */
-               1,              /* cli */
-       };
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       /* Test software BPs - int3 */
-       memset(&debug, 0, sizeof(debug));
-       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
-       vcpu_guest_debug_set(vcpu, &debug);
-       vcpu_run(vcpu);
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                   run->debug.arch.exception == BP_VECTOR &&
-                   run->debug.arch.pc == CAST_TO_RIP(sw_bp),
-                   "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
-                   run->exit_reason, run->debug.arch.exception,
-                   run->debug.arch.pc, CAST_TO_RIP(sw_bp));
-       vcpu_skip_insn(vcpu, 1);
-
-       /* Test instruction HW BP over DR[0-3] */
-       for (i = 0; i < 4; i++) {
-               memset(&debug, 0, sizeof(debug));
-               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
-               debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
-               debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
-               vcpu_guest_debug_set(vcpu, &debug);
-               vcpu_run(vcpu);
-               target_dr6 = 0xffff0ff0 | (1UL << i);
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                           run->debug.arch.exception == DB_VECTOR &&
-                           run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
-                           run->debug.arch.dr6 == target_dr6,
-                           "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           i, run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, CAST_TO_RIP(hw_bp),
-                           run->debug.arch.dr6, target_dr6);
-       }
-       /* Skip "nop" */
-       vcpu_skip_insn(vcpu, 1);
-
-       /* Test data access HW BP over DR[0-3] */
-       for (i = 0; i < 4; i++) {
-               memset(&debug, 0, sizeof(debug));
-               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
-               debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
-               debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
-                   (0x000d0000UL << (4*i));
-               vcpu_guest_debug_set(vcpu, &debug);
-               vcpu_run(vcpu);
-               target_dr6 = 0xffff0ff0 | (1UL << i);
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                           run->debug.arch.exception == DB_VECTOR &&
-                           run->debug.arch.pc == CAST_TO_RIP(write_data) &&
-                           run->debug.arch.dr6 == target_dr6,
-                           "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           i, run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, CAST_TO_RIP(write_data),
-                           run->debug.arch.dr6, target_dr6);
-               /* Rollback the 4-bytes "mov" */
-               vcpu_skip_insn(vcpu, -7);
-       }
-       /* Skip the 4-bytes "mov" */
-       vcpu_skip_insn(vcpu, 7);
-
-       /* Test single step */
-       target_rip = CAST_TO_RIP(ss_start);
-       target_dr6 = 0xffff4ff0ULL;
-       for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
-               target_rip += ss_size[i];
-               memset(&debug, 0, sizeof(debug));
-               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
-                               KVM_GUESTDBG_BLOCKIRQ;
-               debug.arch.debugreg[7] = 0x00000400;
-               vcpu_guest_debug_set(vcpu, &debug);
-               vcpu_run(vcpu);
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                           run->debug.arch.exception == DB_VECTOR &&
-                           run->debug.arch.pc == target_rip &&
-                           run->debug.arch.dr6 == target_dr6,
-                           "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           i, run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
-                           target_dr6);
-       }
-
-       /* Finally test global disable */
-       memset(&debug, 0, sizeof(debug));
-       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
-       debug.arch.debugreg[7] = 0x400 | DR7_GD;
-       vcpu_guest_debug_set(vcpu, &debug);
-       vcpu_run(vcpu);
-       target_dr6 = 0xffff0ff0 | DR6_BD;
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                   run->debug.arch.exception == DB_VECTOR &&
-                   run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
-                   run->debug.arch.dr6 == target_dr6,
-                           "DR7.GD: exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
-                           target_dr6);
-
-       /* Disable all debug controls, run to the end */
-       memset(&debug, 0, sizeof(debug));
-       vcpu_guest_debug_set(vcpu, &debug);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       cmd = get_ucall(vcpu, &uc);
-       TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c

deleted file mode 100644 (file)

index 2929c06..0000000
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ /dev/null
@@ -1,263 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty logging page splitting test
- *
- * Based on dirty_log_perf.c
- *
- * Copyright (C) 2018, Red Hat, Inc.
- * Copyright (C) 2023, Google, Inc.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <linux/bitmap.h>
-
-#include "kvm_util.h"
-#include "test_util.h"
-#include "memstress.h"
-#include "guest_modes.h"
-#include "ucall_common.h"
-
-#define VCPUS          2
-#define SLOTS          2
-#define ITERATIONS     2
-
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
-
-static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
-
-static u64 dirty_log_manual_caps;
-static bool host_quit;
-static int iteration;
-static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
-
-struct kvm_page_stats {
-       uint64_t pages_4k;
-       uint64_t pages_2m;
-       uint64_t pages_1g;
-       uint64_t hugepages;
-};
-
-static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
-{
-       stats->pages_4k = vm_get_stat(vm, "pages_4k");
-       stats->pages_2m = vm_get_stat(vm, "pages_2m");
-       stats->pages_1g = vm_get_stat(vm, "pages_1g");
-       stats->hugepages = stats->pages_2m + stats->pages_1g;
-
-       pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
-                stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
-                stats->hugepages);
-}
-
-static void run_vcpu_iteration(struct kvm_vm *vm)
-{
-       int i;
-
-       iteration++;
-       for (i = 0; i < VCPUS; i++) {
-               while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
-                      iteration)
-                       ;
-       }
-}
-
-static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
-{
-       struct kvm_vcpu *vcpu = vcpu_args->vcpu;
-       int vcpu_idx = vcpu_args->vcpu_idx;
-
-       while (!READ_ONCE(host_quit)) {
-               int current_iteration = READ_ONCE(iteration);
-
-               vcpu_run(vcpu);
-
-               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
-
-               vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
-
-               /* Wait for the start of the next iteration to be signaled. */
-               while (current_iteration == READ_ONCE(iteration) &&
-                      READ_ONCE(iteration) >= 0 &&
-                      !READ_ONCE(host_quit))
-                       ;
-       }
-}
-
-static void run_test(enum vm_guest_mode mode, void *unused)
-{
-       struct kvm_vm *vm;
-       unsigned long **bitmaps;
-       uint64_t guest_num_pages;
-       uint64_t host_num_pages;
-       uint64_t pages_per_slot;
-       int i;
-       struct kvm_page_stats stats_populated;
-       struct kvm_page_stats stats_dirty_logging_enabled;
-       struct kvm_page_stats stats_dirty_pass[ITERATIONS];
-       struct kvm_page_stats stats_clear_pass[ITERATIONS];
-       struct kvm_page_stats stats_dirty_logging_disabled;
-       struct kvm_page_stats stats_repopulated;
-
-       vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
-                                SLOTS, backing_src, false);
-
-       guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
-       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-       host_num_pages = vm_num_host_pages(mode, guest_num_pages);
-       pages_per_slot = host_num_pages / SLOTS;
-       TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
-       TEST_ASSERT(!(host_num_pages % 512),
-                   "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
-
-       bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
-
-       if (dirty_log_manual_caps)
-               vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
-                             dirty_log_manual_caps);
-
-       /* Start the iterations */
-       iteration = -1;
-       host_quit = false;
-
-       for (i = 0; i < VCPUS; i++)
-               vcpu_last_completed_iteration[i] = -1;
-
-       memstress_start_vcpu_threads(VCPUS, vcpu_worker);
-
-       run_vcpu_iteration(vm);
-       get_page_stats(vm, &stats_populated, "populating memory");
-
-       /* Enable dirty logging */
-       memstress_enable_dirty_logging(vm, SLOTS);
-
-       get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
-
-       while (iteration < ITERATIONS) {
-               run_vcpu_iteration(vm);
-               get_page_stats(vm, &stats_dirty_pass[iteration - 1],
-                              "dirtying memory");
-
-               memstress_get_dirty_log(vm, bitmaps, SLOTS);
-
-               if (dirty_log_manual_caps) {
-                       memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
-
-                       get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
-               }
-       }
-
-       /* Disable dirty logging */
-       memstress_disable_dirty_logging(vm, SLOTS);
-
-       get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
-
-       /* Run vCPUs again to fault pages back in. */
-       run_vcpu_iteration(vm);
-       get_page_stats(vm, &stats_repopulated, "repopulating memory");
-
-       /*
-        * Tell the vCPU threads to quit.  No need to manually check that vCPUs
-        * have stopped running after disabling dirty logging, the join will
-        * wait for them to exit.
-        */
-       host_quit = true;
-       memstress_join_vcpu_threads(VCPUS);
-
-       memstress_free_bitmaps(bitmaps, SLOTS);
-       memstress_destroy_vm(vm);
-
-       TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
-                       stats_populated.pages_1g * 512 * 512), host_num_pages);
-
-       /*
-        * Check that all huge pages were split. Since large pages can only
-        * exist in the data slot, and the vCPUs should have dirtied all pages
-        * in the data slot, there should be no huge pages left after splitting.
-        * Splitting happens at dirty log enable time without
-        * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
-        * with that capability.
-        */
-       if (dirty_log_manual_caps) {
-               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-               TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
-                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
-                           host_num_pages, stats_clear_pass[0].pages_4k);
-               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
-       } else {
-               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-               TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
-                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
-                           host_num_pages, stats_dirty_logging_enabled.pages_4k);
-       }
-
-       /*
-        * Once dirty logging is disabled and the vCPUs have touched all their
-        * memory again, the hugepage counts should be the same as they were
-        * right after initial population of memory.
-        */
-       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
-       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
-              name);
-       puts("");
-       printf(" -b: specify the size of the memory region which should be\n"
-              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
-              "     (default: 1G)\n");
-       backing_src_help("-s");
-       puts("");
-}
-
-int main(int argc, char *argv[])
-{
-       int opt;
-
-       TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
-       TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
-
-       while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
-               switch (opt) {
-               case 'b':
-                       guest_percpu_mem_size = parse_size(optarg);
-                       break;
-               case 'h':
-                       help(argv[0]);
-                       exit(0);
-               case 's':
-                       backing_src = parse_backing_src_type(optarg);
-                       break;
-               default:
-                       help(argv[0]);
-                       exit(1);
-               }
-       }
-
-       if (!is_backing_src_hugetlb(backing_src)) {
-               pr_info("This test will only work reliably with HugeTLB memory. "
-                       "It can work with THP, but that is best effort.\n");
-       }
-
-       guest_modes_append_default();
-
-       dirty_log_manual_caps = 0;
-       for_each_guest_mode(run_test, NULL);
-
-       dirty_log_manual_caps =
-               kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-
-       if (dirty_log_manual_caps) {
-               dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
-                                         KVM_DIRTY_LOG_INITIALLY_SET);
-               for_each_guest_mode(run_test, NULL);
-       } else {
-               pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c

deleted file mode 100644 (file)

index 8105547..0000000
--- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022, Google LLC.
- *
- * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
- */
-#include "flds_emulation.h"
-#include "test_util.h"
-#include "ucall_common.h"
-
-#define MMIO_GPA       0x700000000
-#define MMIO_GVA       MMIO_GPA
-
-static void guest_code(void)
-{
-       /* Execute flds with an MMIO address to force KVM to emulate it. */
-       flds(MMIO_GVA);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
-       virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
-
-       vcpu_run(vcpu);
-       handle_flds_emulation_failure_exit(vcpu);
-       vcpu_run(vcpu);
-       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c

deleted file mode 100644 (file)

index a72f13a..0000000
--- a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-static bool is_kvm_controlled_msr(uint32_t msr)
-{
-       return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
-}
-
-/*
- * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
- * MSR, and doesn't allow setting the hidden version.
- */
-static bool is_hidden_vmx_msr(uint32_t msr)
-{
-       switch (msr) {
-       case MSR_IA32_VMX_PINBASED_CTLS:
-       case MSR_IA32_VMX_PROCBASED_CTLS:
-       case MSR_IA32_VMX_EXIT_CTLS:
-       case MSR_IA32_VMX_ENTRY_CTLS:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool is_quirked_msr(uint32_t msr)
-{
-       return msr != MSR_AMD64_DE_CFG;
-}
-
-static void test_feature_msr(uint32_t msr)
-{
-       const uint64_t supported_mask = kvm_get_feature_msr(msr);
-       uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /*
-        * Don't bother testing KVM-controlled MSRs beyond verifying that the
-        * MSR can be read from userspace.  Any value is effectively legal, as
-        * KVM is bound by x86 architecture, not by ABI.
-        */
-       if (is_kvm_controlled_msr(msr))
-               return;
-
-       /*
-        * More goofy behavior.  KVM reports the host CPU's actual revision ID,
-        * but initializes the vCPU's revision ID to an arbitrary value.
-        */
-       if (msr == MSR_IA32_UCODE_REV)
-               reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
-
-       /*
-        * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
-        * full set of features supported by KVM.  For non-quirked MSRs, and
-        * when the quirk is disabled, KVM must zero-initialize the MSR and let
-        * userspace do the configuration.
-        */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-       TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
-                   "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
-                   reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
-                   vcpu_get_msr(vcpu, msr));
-       if (!is_hidden_vmx_msr(msr))
-               vcpu_set_msr(vcpu, msr, supported_mask);
-       kvm_vm_free(vm);
-
-       if (is_hidden_vmx_msr(msr))
-               return;
-
-       if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
-           !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
-               return;
-
-       vm = vm_create(1);
-       vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
-
-       vcpu = vm_vcpu_add(vm, 0, NULL);
-       TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
-                   "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
-                   msr, vcpu_get_msr(vcpu, msr));
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       const struct kvm_msr_list *feature_list;
-       int i;
-
-       /*
-        * Skip the entire test if MSR_FEATURES isn't supported, other tests
-        * will cover the "regular" list of MSRs, the coverage here is purely
-        * opportunistic and not interesting on its own.
-        */
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
-
-       (void)kvm_get_msr_index_list();
-
-       feature_list = kvm_get_feature_msr_index_list();
-       for (i = 0; i < feature_list->nmsrs; i++)
-               test_feature_msr(feature_list->indices[i]);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c

deleted file mode 100644 (file)

index 762628f..0000000
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <linux/stringify.h>
-#include <stdint.h>
-
-#include "kvm_test_harness.h"
-#include "apic.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-/* VMCALL and VMMCALL are both 3-byte opcodes. */
-#define HYPERCALL_INSN_SIZE    3
-
-static bool quirk_disabled;
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       regs->rax = -EFAULT;
-       regs->rip += HYPERCALL_INSN_SIZE;
-}
-
-static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
-static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
-
-extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t do_sched_yield(uint8_t apic_id)
-{
-       uint64_t ret;
-
-       asm volatile("hypercall_insn:\n\t"
-                    ".byte 0xcc,0xcc,0xcc\n\t"
-                    : "=a"(ret)
-                    : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
-                    : "memory");
-
-       return ret;
-}
-
-static void guest_main(void)
-{
-       const uint8_t *native_hypercall_insn;
-       const uint8_t *other_hypercall_insn;
-       uint64_t ret;
-
-       if (host_cpu_is_intel) {
-               native_hypercall_insn = vmx_vmcall;
-               other_hypercall_insn  = svm_vmmcall;
-       } else if (host_cpu_is_amd) {
-               native_hypercall_insn = svm_vmmcall;
-               other_hypercall_insn  = vmx_vmcall;
-       } else {
-               GUEST_ASSERT(0);
-               /* unreachable */
-               return;
-       }
-
-       memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
-
-       ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
-
-       /*
-        * If the quirk is disabled, verify that guest_ud_handler() "returned"
-        * -EFAULT and that KVM did NOT patch the hypercall.  If the quirk is
-        * enabled, verify that the hypercall succeeded and that KVM patched in
-        * the "right" hypercall.
-        */
-       if (quirk_disabled) {
-               GUEST_ASSERT(ret == (uint64_t)-EFAULT);
-               GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
-                            HYPERCALL_INSN_SIZE));
-       } else {
-               GUEST_ASSERT(!ret);
-               GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
-                            HYPERCALL_INSN_SIZE));
-       }
-
-       GUEST_DONE();
-}
-
-KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
-               break;
-       case UCALL_DONE:
-               return;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
-                         uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
-       }
-}
-
-static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
-{
-       struct kvm_vm *vm = vcpu->vm;
-
-       vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
-
-       if (disable_quirk)
-               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
-                             KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
-
-       quirk_disabled = disable_quirk;
-       sync_global_to_guest(vm, quirk_disabled);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       enter_guest(vcpu);
-}
-
-KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
-{
-       test_fix_hypercall(vcpu, false);
-}
-
-KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
-{
-       test_fix_hypercall(vcpu, true);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
-
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h

deleted file mode 100644 (file)

index 37b1a9f..0000000
--- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_FLDS_EMULATION_H
-#define SELFTEST_KVM_FLDS_EMULATION_H
-
-#include "kvm_util.h"
-
-#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
-
-/*
- * flds is an instruction that the KVM instruction emulator is known not to
- * support. This can be used in guest code along with a mechanism to force
- * KVM to emulate the instruction (e.g. by providing an MMIO address) to
- * exercise emulation failures.
- */
-static inline void flds(uint64_t address)
-{
-       __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
-}
-
-static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_regs regs;
-       uint8_t *insn_bytes;
-       uint64_t flags;
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-
-       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
-                   "Unexpected suberror: %u",
-                   run->emulation_failure.suberror);
-
-       flags = run->emulation_failure.flags;
-       TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
-                   flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
-                   "run->emulation_failure is missing instruction bytes");
-
-       TEST_ASSERT(run->emulation_failure.insn_size >= 2,
-                   "Expected a 2-byte opcode for 'flds', got %d bytes",
-                   run->emulation_failure.insn_size);
-
-       insn_bytes = run->emulation_failure.insn_bytes;
-       TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
-                   "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
-                   insn_bytes[0], insn_bytes[1]);
-
-       vcpu_regs_get(vcpu, &regs);
-       regs.rip += 2;
-       vcpu_regs_set(vcpu, &regs);
-}
-
-#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c

deleted file mode 100644 (file)

index 10b1b0b..0000000
--- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2023, Google LLC.
- */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
-{
-       const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
-       const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
-       const uint64_t legal = ignored | valid;
-       uint64_t val = BIT_ULL(bit);
-       uint64_t actual;
-       int r;
-
-       r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
-       TEST_ASSERT(val & ~legal ? !r : r == 1,
-                   "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
-                   val, val & ~legal ? "fail" : "succeed");
-
-       actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
-       TEST_ASSERT(actual == (val & valid),
-                   "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
-                   bit, actual, (val & valid));
-
-       vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       unsigned int bit;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       for (bit = 0; bit < BITS_PER_LONG; bit++)
-               test_hwcr_bit(vcpu, bit);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c

deleted file mode 100644 (file)

index e058bc6..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ /dev/null
@@ -1,263 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Tests for Hyper-V clocksources
- */
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-struct ms_hyperv_tsc_page {
-       volatile u32 tsc_sequence;
-       u32 reserved1;
-       volatile u64 tsc_scale;
-       volatile s64 tsc_offset;
-} __packed;
-
-/* Simplified mul_u64_u64_shr() */
-static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
-{
-       union {
-               u64 ll;
-               struct {
-                       u32 low, high;
-               } l;
-       } rm, rn, rh, a0, b0;
-       u64 c;
-
-       a0.ll = a;
-       b0.ll = b;
-
-       rm.ll = (u64)a0.l.low * b0.l.high;
-       rn.ll = (u64)a0.l.high * b0.l.low;
-       rh.ll = (u64)a0.l.high * b0.l.high;
-
-       rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
-       rh.l.high = (c >> 32) + rh.l.high;
-
-       return rh.ll;
-}
-
-static inline void nop_loop(void)
-{
-       int i;
-
-       for (i = 0; i < 100000000; i++)
-               asm volatile("nop");
-}
-
-static inline void check_tsc_msr_rdtsc(void)
-{
-       u64 tsc_freq, r1, r2, t1, t2;
-       s64 delta_ns;
-
-       tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
-       GUEST_ASSERT(tsc_freq > 0);
-
-       /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
-       r1 = rdtsc();
-       t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-       r1 = (r1 + rdtsc()) / 2;
-       nop_loop();
-       r2 = rdtsc();
-       t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-       r2 = (r2 + rdtsc()) / 2;
-
-       GUEST_ASSERT(r2 > r1 && t2 > t1);
-
-       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
-       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
-       if (delta_ns < 0)
-               delta_ns = -delta_ns;
-
-       /* 1% tolerance */
-       GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
-}
-
-static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
-{
-       return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
-}
-
-static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
-{
-       u64 r1, r2, t1, t2;
-
-       /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
-       t1 = get_tscpage_ts(tsc_page);
-       r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-
-       /* 10 ms tolerance */
-       GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
-       nop_loop();
-
-       t2 = get_tscpage_ts(tsc_page);
-       r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-       GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
-}
-
-static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
-{
-       u64 tsc_scale, tsc_offset;
-
-       /* Set Guest OS id to enable Hyper-V emulation */
-       GUEST_SYNC(1);
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       GUEST_SYNC(2);
-
-       check_tsc_msr_rdtsc();
-
-       GUEST_SYNC(3);
-
-       /* Set up TSC page is disabled state, check that it's clean */
-       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
-       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
-       GUEST_ASSERT(tsc_page->tsc_scale == 0);
-       GUEST_ASSERT(tsc_page->tsc_offset == 0);
-
-       GUEST_SYNC(4);
-
-       /* Set up TSC page is enabled state */
-       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
-       GUEST_ASSERT(tsc_page->tsc_sequence != 0);
-
-       GUEST_SYNC(5);
-
-       check_tsc_msr_tsc_page(tsc_page);
-
-       GUEST_SYNC(6);
-
-       tsc_offset = tsc_page->tsc_offset;
-       /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
-
-       GUEST_SYNC(7);
-       /* Sanity check TSC page timestamp, it should be close to 0 */
-       GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
-
-       GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
-
-       nop_loop();
-
-       /*
-        * Enable Re-enlightenment and check that TSC page stays constant across
-        * KVM_SET_CLOCK.
-        */
-       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
-       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
-       tsc_offset = tsc_page->tsc_offset;
-       tsc_scale = tsc_page->tsc_scale;
-       GUEST_SYNC(8);
-       GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
-       GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
-
-       GUEST_SYNC(9);
-
-       check_tsc_msr_tsc_page(tsc_page);
-
-       /*
-        * Disable re-enlightenment and TSC page, check that KVM doesn't update
-        * it anymore.
-        */
-       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
-       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
-       wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
-       memset(tsc_page, 0, sizeof(*tsc_page));
-
-       GUEST_SYNC(10);
-       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
-       GUEST_ASSERT(tsc_page->tsc_offset == 0);
-       GUEST_ASSERT(tsc_page->tsc_scale == 0);
-
-       GUEST_DONE();
-}
-
-static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
-{
-       u64 tsc_freq, r1, r2, t1, t2;
-       s64 delta_ns;
-
-       tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
-       TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
-
-       /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
-       r1 = rdtsc();
-       t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
-       r1 = (r1 + rdtsc()) / 2;
-       nop_loop();
-       r2 = rdtsc();
-       t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
-       r2 = (r2 + rdtsc()) / 2;
-
-       TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
-
-       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
-       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
-       if (delta_ns < 0)
-               delta_ns = -delta_ns;
-
-       /* 1% tolerance */
-       TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
-                   "Elapsed time does not match (MSR=%ld, TSC=%ld)",
-                   (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       vm_vaddr_t tsc_page_gva;
-       int stage;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
-       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       vcpu_set_hv_cpuid(vcpu);
-
-       tsc_page_gva = vm_vaddr_alloc_page(vm);
-       memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
-       TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
-               "TSC page has to be page aligned");
-       vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
-
-       host_check_tsc_msr_rdtsc(vcpu);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       /* Keep in sync with guest_main() */
-                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
-                                   stage);
-                       goto out;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage,
-                           "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-               /* Reset kvmclock triggering TSC page update */
-               if (stage == 7 || stage == 8 || stage == 10) {
-                       struct kvm_clock_data clock = {0};
-
-                       vm_ioctl(vm, KVM_SET_CLOCK, &clock);
-               }
-       }
-
-out:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c

deleted file mode 100644 (file)

index 4f5881d..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_HYPERV_CPUID
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-static void guest_code(void)
-{
-}
-
-static bool smt_possible(void)
-{
-       char buf[16];
-       FILE *f;
-       bool res = true;
-
-       f = fopen("/sys/devices/system/cpu/smt/control", "r");
-       if (f) {
-               if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
-                       if (!strncmp(buf, "forceoff", 8) ||
-                           !strncmp(buf, "notsupported", 12))
-                               res = false;
-               }
-               fclose(f);
-       }
-
-       return res;
-}
-
-static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
-                         bool evmcs_expected)
-{
-       int i;
-       int nent_expected = 10;
-       u32 test_val;
-
-       TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
-                   "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
-                   " (returned %d)",
-                   nent_expected, hv_cpuid_entries->nent);
-
-       for (i = 0; i < hv_cpuid_entries->nent; i++) {
-               const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
-
-               TEST_ASSERT((entry->function >= 0x40000000) &&
-                           (entry->function <= 0x40000082),
-                           "function %x is our of supported range",
-                           entry->function);
-
-               TEST_ASSERT(entry->index == 0,
-                           ".index field should be zero");
-
-               TEST_ASSERT(entry->flags == 0,
-                           ".flags field should be zero");
-
-               TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
-                           !entry->padding[2], "padding should be zero");
-
-               switch (entry->function) {
-               case 0x40000000:
-                       test_val = 0x40000082;
-
-                       TEST_ASSERT(entry->eax == test_val,
-                                   "Wrong max leaf report in 0x40000000.EAX: %x"
-                                   " (evmcs=%d)",
-                                   entry->eax, evmcs_expected
-                               );
-                       break;
-               case 0x40000004:
-                       test_val = entry->eax & (1UL << 18);
-
-                       TEST_ASSERT(!!test_val == !smt_possible(),
-                                   "NoNonArchitecturalCoreSharing bit"
-                                   " doesn't reflect SMT setting");
-                       break;
-               case 0x4000000A:
-                       TEST_ASSERT(entry->eax & (1UL << 19),
-                                   "Enlightened MSR-Bitmap should always be supported"
-                                   " 0x40000000.EAX: %x", entry->eax);
-                       if (evmcs_expected)
-                               TEST_ASSERT((entry->eax & 0xffff) == 0x101,
-                                   "Supported Enlightened VMCS version range is supposed to be 1:1"
-                                   " 0x40000000.EAX: %x", entry->eax);
-
-                       break;
-               default:
-                       break;
-
-               }
-               /*
-                * If needed for debug:
-                * fprintf(stdout,
-                *      "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
-                *      entry->function, entry->eax, entry->ebx, entry->ecx,
-                *      entry->edx);
-                */
-       }
-}
-
-void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       static struct kvm_cpuid2 cpuid = {.nent = 0};
-       int ret;
-
-       if (vcpu)
-               ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
-       else
-               ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
-
-       TEST_ASSERT(ret == -1 && errno == E2BIG,
-                   "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
-                   " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       const struct kvm_cpuid2 *hv_cpuid_entries;
-       struct kvm_vcpu *vcpu;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Test vCPU ioctl version */
-       test_hv_cpuid_e2big(vm, vcpu);
-
-       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
-       test_hv_cpuid(hv_cpuid_entries, false);
-       free((void *)hv_cpuid_entries);
-
-       if (!kvm_cpu_has(X86_FEATURE_VMX) ||
-           !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-               print_skip("Enlightened VMCS is unsupported");
-               goto do_sys;
-       }
-       vcpu_enable_evmcs(vcpu);
-       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
-       test_hv_cpuid(hv_cpuid_entries, true);
-       free((void *)hv_cpuid_entries);
-
-do_sys:
-       /* Test system ioctl version */
-       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
-               print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
-               goto out;
-       }
-
-       test_hv_cpuid_e2big(vm, NULL);
-
-       hv_cpuid_entries = kvm_get_supported_hv_cpuid();
-       test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
-
-out:
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c

deleted file mode 100644 (file)

index 74cf196..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ /dev/null
@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for Enlightened VMCS, including nested guest state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <linux/bitmap.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "hyperv.h"
-#include "vmx.h"
-
-static int ud_count;
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       ud_count++;
-       regs->rip += 3; /* VMLAUNCH */
-}
-
-static void guest_nmi_handler(struct ex_regs *regs)
-{
-}
-
-static inline void rdmsr_from_l2(uint32_t msr)
-{
-       /* Currently, L1 doesn't preserve GPRs during vmexits. */
-       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
-                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                             "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-/* Exit to L1 from L2 with RDMSR instruction */
-void l2_guest_code(void)
-{
-       u64 unused;
-
-       GUEST_SYNC(7);
-
-       GUEST_SYNC(8);
-
-       /* Forced exit to L1 upon restore */
-       GUEST_SYNC(9);
-
-       vmcall();
-
-       /* MSR-Bitmap tests */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
-       vmcall();
-       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
-
-       /* L2 TLB flush tests */
-       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
-                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
-       rdmsr_from_l2(MSR_FS_BASE);
-       /*
-        * Note: hypercall status (RAX) is not preserved correctly by L1 after
-        * synthetic vmexit, use unchecked version.
-        */
-       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
-                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
-                          &unused);
-
-       /* Done, exit to L1 and never come back.  */
-       vmcall();
-}
-
-void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
-               vm_vaddr_t hv_hcall_page_gpa)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
-
-       x2apic_enable();
-
-       GUEST_SYNC(1);
-       GUEST_SYNC(2);
-
-       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
-       evmcs_enable();
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_SYNC(3);
-       GUEST_ASSERT(load_evmcs(hv_pages));
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
-       GUEST_SYNC(4);
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(5);
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-       current_evmcs->revision_id = -1u;
-       GUEST_ASSERT(vmlaunch());
-       current_evmcs->revision_id = EVMCS_VERSION;
-       GUEST_SYNC(6);
-
-       vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
-               PIN_BASED_NMI_EXITING);
-
-       /* L2 TLB flush setup */
-       current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
-       current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
-       current_evmcs->hv_vm_id = 1;
-       current_evmcs->hv_vp_id = 1;
-       current_vp_assist->nested_control.features.directhypercall = 1;
-       *(u32 *)(hv_pages->partition_assist) = 0;
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
-       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
-       /*
-        * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
-        * up-to-date (RIP points where it should and not at the beginning
-        * of l2_guest_code(). GUEST_SYNC(9) checkes that.
-        */
-       GUEST_ASSERT(!vmresume());
-
-       GUEST_SYNC(10);
-
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       current_evmcs->guest_rip += 3; /* vmcall */
-
-       /* Intercept RDMSR 0xc0000100 */
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
-               CPU_BASED_USE_MSR_BITMAPS);
-       __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-
-       /* Enable enlightened MSR bitmap */
-       current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-
-       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
-       __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
-       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
-       current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
-       GUEST_ASSERT(!vmresume());
-       /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       current_evmcs->guest_rip += 3; /* vmcall */
-
-       /* Now tell KVM we've changed MSR-Bitmap */
-       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-
-       /*
-        * L2 TLB flush test. First VMCALL should be handled directly by L0,
-        * no VMCALL exit expected.
-        */
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-       /* Enable synthetic vmexit */
-       *(u32 *)(hv_pages->partition_assist) = 1;
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
-
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       GUEST_SYNC(11);
-
-       /* Try enlightened vmptrld with an incorrect GPA */
-       evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(ud_count == 1);
-       GUEST_DONE();
-}
-
-void inject_nmi(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu_events events;
-
-       vcpu_events_get(vcpu, &events);
-
-       events.nmi.pending = 1;
-       events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
-
-       vcpu_events_set(vcpu, &events);
-}
-
-static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
-                                       struct kvm_vcpu *vcpu)
-{
-       struct kvm_regs regs1, regs2;
-       struct kvm_x86_state *state;
-
-       state = vcpu_save_state(vcpu);
-       memset(&regs1, 0, sizeof(regs1));
-       vcpu_regs_get(vcpu, &regs1);
-
-       kvm_vm_release(vm);
-
-       /* Restore state in a new VM.  */
-       vcpu = vm_recreate_with_one_vcpu(vm);
-       vcpu_set_hv_cpuid(vcpu);
-       vcpu_enable_evmcs(vcpu);
-       vcpu_load_state(vcpu, state);
-       kvm_x86_state_cleanup(state);
-
-       memset(&regs2, 0, sizeof(regs2));
-       vcpu_regs_get(vcpu, &regs2);
-       TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                   "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                   (ulong) regs2.rdi, (ulong) regs2.rsi);
-       return vcpu;
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
-       vm_vaddr_t hcall_page;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
-       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       hcall_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
-
-       vcpu_set_hv_cpuid(vcpu);
-       vcpu_enable_evmcs(vcpu);
-
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
-       vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
-
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
-
-       pr_info("Running L1 which uses EVMCS to run L2\n");
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-               vcpu = save_restore_vm(vm, vcpu);
-
-               /* Force immediate L2->L1 exit before resuming */
-               if (stage == 8) {
-                       pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
-                       inject_nmi(vcpu);
-               }
-
-               /*
-                * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
-                * restored VM (before the first KVM_RUN) to check that
-                * KVM_STATE_NESTED_EVMCS is not lost.
-                */
-               if (stage == 9) {
-                       pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
-                       vcpu = save_restore_vm(vm, vcpu);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c

deleted file mode 100644 (file)

index 949e08e..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
- * exit to userspace and receive result in guest.
- *
- * Negative tests are present in hyperv_features.c
- *
- * Copyright 2022 Google LLC
- * Author: Vipin Sharma <vipinsh@google.com>
- */
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-/* Any value is fine */
-#define EXT_CAPABILITIES 0xbull
-
-static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
-                      vm_vaddr_t out_pg_gva)
-{
-       uint64_t *output_gva;
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
-
-       output_gva = (uint64_t *)out_pg_gva;
-
-       hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
-
-       /* TLFS states output will be a uint64_t value */
-       GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
-
-       GUEST_DONE();
-}
-
-int main(void)
-{
-       vm_vaddr_t hcall_out_page;
-       vm_vaddr_t hcall_in_page;
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       uint64_t *outval;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
-
-       /* Verify if extended hypercalls are supported */
-       if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
-                          HV_ENABLE_EXTENDED_HYPERCALLS)) {
-               print_skip("Extended calls not supported by the kernel");
-               exit(KSFT_SKIP);
-       }
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-       vcpu_set_hv_cpuid(vcpu);
-
-       /* Hypercall input */
-       hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
-
-       /* Hypercall output */
-       hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
-
-       vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
-                     addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
-                   "Unexpected exit reason: %u (%s)",
-                   run->exit_reason, exit_reason_str(run->exit_reason));
-
-       outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
-       *outval = EXT_CAPABILITIES;
-       run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                   "Unexpected exit reason: %u (%s)",
-                   run->exit_reason, exit_reason_str(run->exit_reason));
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
-       }
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c

deleted file mode 100644 (file)

index 068e9c6..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Tests for Hyper-V features enablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-/*
- * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
- * but to activate the feature it is sufficient to set it to a non-zero
- * value. Use BIT(0) for that.
- */
-#define HV_PV_SPINLOCKS_TEST            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
-
-struct msr_data {
-       uint32_t idx;
-       bool fault_expected;
-       bool write;
-       u64 write_val;
-};
-
-struct hcall_data {
-       uint64_t control;
-       uint64_t expect;
-       bool ud_expected;
-};
-
-static bool is_write_only_msr(uint32_t msr)
-{
-       return msr == HV_X64_MSR_EOI;
-}
-
-static void guest_msr(struct msr_data *msr)
-{
-       uint8_t vector = 0;
-       uint64_t msr_val = 0;
-
-       GUEST_ASSERT(msr->idx);
-
-       if (msr->write)
-               vector = wrmsr_safe(msr->idx, msr->write_val);
-
-       if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
-               vector = rdmsr_safe(msr->idx, &msr_val);
-
-       if (msr->fault_expected)
-               __GUEST_ASSERT(vector == GP_VECTOR,
-                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
-                              msr->write ? "WR" : "RD", msr->idx, vector);
-       else
-               __GUEST_ASSERT(!vector,
-                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
-                              msr->write ? "WR" : "RD", msr->idx, vector);
-
-       if (vector || is_write_only_msr(msr->idx))
-               goto done;
-
-       if (msr->write)
-               __GUEST_ASSERT(!vector,
-                              "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
-                              msr->idx, msr->write_val, msr_val);
-
-       /* Invariant TSC bit appears when TSC invariant control MSR is written to */
-       if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
-               if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
-                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
-               else
-                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
-                                    !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
-       }
-
-done:
-       GUEST_DONE();
-}
-
-static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
-{
-       u64 res, input, output;
-       uint8_t vector;
-
-       GUEST_ASSERT_NE(hcall->control, 0);
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-
-       if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
-               input = pgs_gpa;
-               output = pgs_gpa + 4096;
-       } else {
-               input = output = 0;
-       }
-
-       vector = __hyperv_hypercall(hcall->control, input, output, &res);
-       if (hcall->ud_expected) {
-               __GUEST_ASSERT(vector == UD_VECTOR,
-                              "Expected #UD for control '%lu', got vector '0x%x'",
-                              hcall->control, vector);
-       } else {
-               __GUEST_ASSERT(!vector,
-                              "Expected no exception for control '%lu', got vector '0x%x'",
-                              hcall->control, vector);
-               GUEST_ASSERT_EQ(res, hcall->expect);
-       }
-
-       GUEST_DONE();
-}
-
-static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
-{
-       /*
-        * Enable all supported Hyper-V features, then clear the leafs holding
-        * the features that will be tested one by one.
-        */
-       vcpu_set_hv_cpuid(vcpu);
-
-       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
-       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
-       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
-}
-
-static void guest_test_msrs_access(void)
-{
-       struct kvm_cpuid2 *prev_cpuid = NULL;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage = 0;
-       vm_vaddr_t msr_gva;
-       struct msr_data *msr;
-       bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
-
-       while (true) {
-               vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
-
-               msr_gva = vm_vaddr_alloc_page(vm);
-               memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
-               msr = addr_gva2hva(vm, msr_gva);
-
-               vcpu_args_set(vcpu, 1, msr_gva);
-               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
-
-               if (!prev_cpuid) {
-                       vcpu_reset_hv_cpuid(vcpu);
-
-                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
-               } else {
-                       vcpu_init_cpuid(vcpu, prev_cpuid);
-               }
-
-               /* TODO: Make this entire test easier to maintain. */
-               if (stage >= 21)
-                       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
-
-               switch (stage) {
-               case 0:
-                       /*
-                        * Only available when Hyper-V identification is set
-                        */
-                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 1:
-                       msr->idx = HV_X64_MSR_HYPERCALL;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 2:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
-                       /*
-                        * HV_X64_MSR_GUEST_OS_ID has to be written first to make
-                        * HV_X64_MSR_HYPERCALL available.
-                        */
-                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
-                       msr->write = true;
-                       msr->write_val = HYPERV_LINUX_OS_ID;
-                       msr->fault_expected = false;
-                       break;
-               case 3:
-                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 4:
-                       msr->idx = HV_X64_MSR_HYPERCALL;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-
-               case 5:
-                       msr->idx = HV_X64_MSR_VP_RUNTIME;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 6:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
-                       msr->idx = HV_X64_MSR_VP_RUNTIME;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 7:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_VP_RUNTIME;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 8:
-                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 9:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
-                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 10:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 11:
-                       msr->idx = HV_X64_MSR_VP_INDEX;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 12:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
-                       msr->idx = HV_X64_MSR_VP_INDEX;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 13:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_VP_INDEX;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 14:
-                       msr->idx = HV_X64_MSR_RESET;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 15:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
-                       msr->idx = HV_X64_MSR_RESET;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 16:
-                       msr->idx = HV_X64_MSR_RESET;
-                       msr->write = true;
-                       /*
-                        * TODO: the test only writes '0' to HV_X64_MSR_RESET
-                        * at the moment, writing some other value there will
-                        * trigger real vCPU reset and the code is not prepared
-                        * to handle it yet.
-                        */
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 17:
-                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 18:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
-                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 19:
-                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 20:
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 21:
-                       /*
-                        * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
-                        * capability enabled and guest visible CPUID bit unset.
-                        */
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 22:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 23:
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 24:
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 25:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 26:
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-               case 27:
-                       /* Direct mode test */
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = true;
-                       msr->write_val = 1 << 12;
-                       msr->fault_expected = true;
-                       break;
-               case 28:
-                       vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = true;
-                       msr->write_val = 1 << 12;
-                       msr->fault_expected = false;
-                       break;
-
-               case 29:
-                       msr->idx = HV_X64_MSR_EOI;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 30:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
-                       msr->idx = HV_X64_MSR_EOI;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-
-               case 31:
-                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 32:
-                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
-                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 33:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 34:
-                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 35:
-                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
-                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 36:
-                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-               case 37:
-                       /* Can only write '0' */
-                       msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 38:
-                       msr->idx = HV_X64_MSR_CRASH_P0;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 39:
-                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
-                       msr->idx = HV_X64_MSR_CRASH_P0;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 40:
-                       msr->idx = HV_X64_MSR_CRASH_P0;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-
-               case 41:
-                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 42:
-                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
-                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 43:
-                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 44:
-                       /* MSR is not available when CPUID feature bit is unset */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 45:
-                       /* MSR is vailable when CPUID feature bit is set */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 46:
-                       /* Writing bits other than 0 is forbidden */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = true;
-                       msr->write_val = 0xdeadbeef;
-                       msr->fault_expected = true;
-                       break;
-               case 47:
-                       /* Setting bit 0 enables the feature */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-
-               default:
-                       kvm_vm_free(vm);
-                       return;
-               }
-
-               vcpu_set_cpuid(vcpu);
-
-               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
-
-               pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
-                        msr->idx, msr->write ? "write" : "read");
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       return;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
-                       return;
-               }
-
-next_stage:
-               stage++;
-               kvm_vm_free(vm);
-       }
-}
-
-static void guest_test_hcalls_access(void)
-{
-       struct kvm_cpuid2 *prev_cpuid = NULL;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage = 0;
-       vm_vaddr_t hcall_page, hcall_params;
-       struct hcall_data *hcall;
-
-       while (true) {
-               vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
-
-               /* Hypercall input/output */
-               hcall_page = vm_vaddr_alloc_pages(vm, 2);
-               memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-               hcall_params = vm_vaddr_alloc_page(vm);
-               memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
-               hcall = addr_gva2hva(vm, hcall_params);
-
-               vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
-
-               if (!prev_cpuid) {
-                       vcpu_reset_hv_cpuid(vcpu);
-
-                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
-               } else {
-                       vcpu_init_cpuid(vcpu, prev_cpuid);
-               }
-
-               switch (stage) {
-               case 0:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
-                       hcall->control = 0xbeef;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
-                       break;
-
-               case 1:
-                       hcall->control = HVCALL_POST_MESSAGE;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 2:
-                       vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
-                       hcall->control = HVCALL_POST_MESSAGE;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
-                       break;
-
-               case 3:
-                       hcall->control = HVCALL_SIGNAL_EVENT;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 4:
-                       vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
-                       hcall->control = HVCALL_SIGNAL_EVENT;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
-                       break;
-
-               case 5:
-                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
-                       break;
-               case 6:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
-                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 7:
-                       vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
-                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
-                       hcall->expect = HV_STATUS_OPERATION_DENIED;
-                       break;
-
-               case 8:
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 9:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-               case 10:
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 11:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-
-               case 12:
-                       hcall->control = HVCALL_SEND_IPI;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 13:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
-                       hcall->control = HVCALL_SEND_IPI;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
-                       break;
-               case 14:
-                       /* Nothing in 'sparse banks' -> success */
-                       hcall->control = HVCALL_SEND_IPI_EX;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-
-               case 15:
-                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 16:
-                       vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
-                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-               case 17:
-                       /* XMM fast hypercall */
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
-                       hcall->ud_expected = true;
-                       break;
-               case 18:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
-                       hcall->ud_expected = false;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-               case 19:
-                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 20:
-                       vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
-                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
-                       hcall->expect = HV_STATUS_INVALID_PARAMETER;
-                       break;
-               case 21:
-                       kvm_vm_free(vm);
-                       return;
-               }
-
-               vcpu_set_cpuid(vcpu);
-
-               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
-
-               pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       return;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
-                       return;
-               }
-
-               stage++;
-               kvm_vm_free(vm);
-       }
-}
-
-int main(void)
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
-
-       pr_info("Testing access to Hyper-V specific MSRs\n");
-       guest_test_msrs_access();
-
-       pr_info("Testing access to Hyper-V hypercalls\n");
-       guest_test_hcalls_access();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c

deleted file mode 100644 (file)

index 22c0c12..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ /dev/null
@@ -1,308 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- */
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "kvm_util.h"
-#include "hyperv.h"
-#include "test_util.h"
-#include "vmx.h"
-
-#define RECEIVER_VCPU_ID_1 2
-#define RECEIVER_VCPU_ID_2 65
-
-#define IPI_VECTOR      0xfe
-
-static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
-
-struct hv_vpset {
-       u64 format;
-       u64 valid_bank_mask;
-       u64 bank_contents[2];
-};
-
-enum HV_GENERIC_SET_FORMAT {
-       HV_GENERIC_SET_SPARSE_4K,
-       HV_GENERIC_SET_ALL,
-};
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
-       u32 vector;
-       u32 reserved;
-       u64 cpu_mask;
-};
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
-       u32 vector;
-       u32 reserved;
-       struct hv_vpset vp_set;
-};
-
-static inline void hv_init(vm_vaddr_t pgs_gpa)
-{
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-}
-
-static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
-{
-       u32 vcpu_id;
-
-       x2apic_enable();
-       hv_init(pgs_gpa);
-
-       vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-
-       /* Signal sender vCPU we're ready */
-       ipis_rcvd[vcpu_id] = (u64)-1;
-
-       for (;;)
-               asm volatile("sti; hlt; cli");
-}
-
-static void guest_ipi_handler(struct ex_regs *regs)
-{
-       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-
-       ipis_rcvd[vcpu_id]++;
-       wrmsr(HV_X64_MSR_EOI, 1);
-}
-
-static inline void nop_loop(void)
-{
-       int i;
-
-       for (i = 0; i < 100000000; i++)
-               asm volatile("nop");
-}
-
-static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
-{
-       struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
-       struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
-       int stage = 1, ipis_expected[2] = {0};
-
-       hv_init(pgs_gpa);
-       GUEST_SYNC(stage++);
-
-       /* Wait for receiver vCPUs to come up */
-       while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
-               nop_loop();
-       ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
-
-       /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
-       ipi->vector = IPI_VECTOR;
-       ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
-       hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
-       hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
-                        IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-       ipi_ex->vp_set.valid_bank_mask = 1 << 0;
-       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
-                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-       ipi_ex->vp_set.valid_bank_mask = 1 << 1;
-       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
-                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-       ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
-       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
-       ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
-                        (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
-       hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /*
-        * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
-        */
-       ipi_ex->vp_set.valid_bank_mask = 0;
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
-                        IPI_VECTOR, HV_GENERIC_SET_ALL);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       GUEST_DONE();
-}
-
-static void *vcpu_thread(void *arg)
-{
-       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
-       int old, r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
-                   vcpu->id, r);
-
-       vcpu_run(vcpu);
-
-       TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
-
-       return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
-       void *retval;
-       int r;
-
-       r = pthread_cancel(thread);
-       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-
-       r = pthread_join(thread, &retval);
-       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-       TEST_ASSERT(retval == PTHREAD_CANCELED,
-                   "expected retval=%p, got %p", PTHREAD_CANCELED,
-                   retval);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu[3];
-       vm_vaddr_t hcall_page;
-       pthread_t threads[2];
-       int stage = 1, r;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
-
-       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
-
-       /* Hypercall input/output */
-       hcall_page = vm_vaddr_alloc_pages(vm, 2);
-       memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-
-       vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
-       vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
-       vcpu_set_hv_cpuid(vcpu[1]);
-
-       vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
-       vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
-       vcpu_set_hv_cpuid(vcpu[2]);
-
-       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
-
-       vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_hv_cpuid(vcpu[0]);
-
-       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
-       TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
-
-       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
-       TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
-
-       while (true) {
-               vcpu_run(vcpu[0]);
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu[0], &uc)) {
-               case UCALL_SYNC:
-                       TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)",
-                                   uc.args[1], stage);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               stage++;
-       }
-
-done:
-       cancel_join_vcpu_thread(threads[0], vcpu[1]);
-       cancel_join_vcpu_thread(threads[1], vcpu[2]);
-       kvm_vm_free(vm);
-
-       return r;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c

deleted file mode 100644 (file)

index 0ddb632..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ /dev/null
@@ -1,199 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2022, Red Hat, Inc.
- *
- * Tests for Hyper-V extensions to SVM.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <linux/bitmap.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "hyperv.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-/* Exit to L1 from L2 with RDMSR instruction */
-static inline void rdmsr_from_l2(uint32_t msr)
-{
-       /* Currently, L1 doesn't preserve GPRs during vmexits. */
-       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
-                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                             "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-void l2_guest_code(void)
-{
-       u64 unused;
-
-       GUEST_SYNC(3);
-       /* Exit to L1 */
-       vmmcall();
-
-       /* MSR-Bitmap tests */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
-       vmmcall();
-       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
-
-       GUEST_SYNC(5);
-
-       /* L2 TLB flush tests */
-       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                        HV_HYPERCALL_FAST_BIT, 0x0,
-                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                        HV_FLUSH_ALL_PROCESSORS);
-       rdmsr_from_l2(MSR_FS_BASE);
-       /*
-        * Note: hypercall status (RAX) is not preserved correctly by L1 after
-        * synthetic vmexit, use unchecked version.
-        */
-       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                          HV_HYPERCALL_FAST_BIT, 0x0,
-                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                          HV_FLUSH_ALL_PROCESSORS, &unused);
-
-       /* Done, exit to L1 and never come back.  */
-       vmmcall();
-}
-
-static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
-                                                   struct hyperv_test_pages *hv_pages,
-                                                   vm_vaddr_t pgs_gpa)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-       struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
-
-       GUEST_SYNC(1);
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
-
-       GUEST_ASSERT(svm->vmcb_gpa);
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* L2 TLB flush setup */
-       hve->partition_assist_page = hv_pages->partition_assist_gpa;
-       hve->hv_enlightenments_control.nested_flush_hypercall = 1;
-       hve->hv_vm_id = 1;
-       hve->hv_vp_id = 1;
-       current_vp_assist->nested_control.features.directhypercall = 1;
-       *(u32 *)(hv_pages->partition_assist) = 0;
-
-       GUEST_SYNC(2);
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(4);
-       vmcb->save.rip += 3;
-
-       /* Intercept RDMSR 0xc0000100 */
-       vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
-       __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-
-       /* Enable enlightened MSR bitmap */
-       hve->hv_enlightenments_control.msr_bitmap = 1;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-
-       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
-       __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
-       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
-       vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
-       run_guest(vmcb, svm->vmcb_gpa);
-       /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       vmcb->save.rip += 3; /* vmcall */
-
-       /* Now tell KVM we've changed MSR-Bitmap */
-       vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-
-
-       /*
-        * L2 TLB flush test. First VMCALL should be handled directly by L0,
-        * no VMCALL exit expected.
-        */
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-       /* Enable synthetic vmexit */
-       *(u32 *)(hv_pages->partition_assist) = 1;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
-       GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(6);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
-       vm_vaddr_t hcall_page;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_set_hv_cpuid(vcpu);
-       vcpu_alloc_svm(vm, &nested_gva);
-       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
-
-       hcall_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
-
-       vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c

deleted file mode 100644 (file)

index 077cd0e..0000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ /dev/null
@@ -1,680 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- */
-#include <asm/barrier.h>
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-#include "test_util.h"
-#include "vmx.h"
-
-#define WORKER_VCPU_ID_1 2
-#define WORKER_VCPU_ID_2 65
-
-#define NTRY 100
-#define NTEST_PAGES 2
-
-struct hv_vpset {
-       u64 format;
-       u64 valid_bank_mask;
-       u64 bank_contents[];
-};
-
-enum HV_GENERIC_SET_FORMAT {
-       HV_GENERIC_SET_SPARSE_4K,
-       HV_GENERIC_SET_ALL,
-};
-
-#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
-       u64 address_space;
-       u64 flags;
-       u64 processor_mask;
-       u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
-       u64 address_space;
-       u64 flags;
-       struct hv_vpset hv_vp_set;
-       u64 gva_list[];
-} __packed;
-
-/*
- * Pass the following info to 'workers' and 'sender'
- * - Hypercall page's GVA
- * - Hypercall page's GPA
- * - Test pages GVA
- * - GVAs of the test pages' PTEs
- */
-struct test_data {
-       vm_vaddr_t hcall_gva;
-       vm_paddr_t hcall_gpa;
-       vm_vaddr_t test_pages;
-       vm_vaddr_t test_pages_pte[NTEST_PAGES];
-};
-
-/* 'Worker' vCPU code checking the contents of the test page */
-static void worker_guest_code(vm_vaddr_t test_data)
-{
-       struct test_data *data = (struct test_data *)test_data;
-       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-       void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
-       u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
-       u64 expected, val;
-
-       x2apic_enable();
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-
-       for (;;) {
-               cpu_relax();
-
-               expected = READ_ONCE(*this_cpu);
-
-               /*
-                * Make sure the value in the test page is read after reading
-                * the expectation for the first time. Pairs with wmb() in
-                * prepare_to_test().
-                */
-               rmb();
-
-               val = READ_ONCE(*(u64 *)data->test_pages);
-
-               /*
-                * Make sure the value in the test page is read after before
-                * reading the expectation for the second time. Pairs with wmb()
-                * post_test().
-                */
-               rmb();
-
-               /*
-                * '0' indicates the sender is between iterations, wait until
-                * the sender is ready for this vCPU to start checking again.
-                */
-               if (!expected)
-                       continue;
-
-               /*
-                * Re-read the per-vCPU byte to ensure the sender didn't move
-                * onto a new iteration.
-                */
-               if (expected != READ_ONCE(*this_cpu))
-                       continue;
-
-               GUEST_ASSERT(val == expected);
-       }
-}
-
-/*
- * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
- * test page. '0' means don't check.
- */
-static void set_expected_val(void *addr, u64 val, int vcpu_id)
-{
-       void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
-
-       *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
-}
-
-/*
- * Update PTEs swapping two test pages.
- * TODO: use swap()/xchg() when these are provided.
- */
-static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
-{
-       uint64_t tmp = *(uint64_t *)pte_gva1;
-
-       *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
-       *(uint64_t *)pte_gva2 = tmp;
-}
-
-/*
- * TODO: replace the silly NOP loop with a proper udelay() implementation.
- */
-static inline void do_delay(void)
-{
-       int i;
-
-       for (i = 0; i < 1000000; i++)
-               asm volatile("nop");
-}
-
-/*
- * Prepare to test: 'disable' workers by setting the expectation to '0',
- * clear hypercall input page and then swap two test pages.
- */
-static inline void prepare_to_test(struct test_data *data)
-{
-       /* Clear hypercall input page */
-       memset((void *)data->hcall_gva, 0, PAGE_SIZE);
-
-       /* 'Disable' workers */
-       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
-       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
-
-       /* Make sure workers are 'disabled' before we swap PTEs. */
-       wmb();
-
-       /* Make sure workers have enough time to notice */
-       do_delay();
-
-       /* Swap test page mappings */
-       swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
-}
-
-/*
- * Finalize the test: check hypercall resule set the expected val for
- * 'worker' CPUs and give them some time to test.
- */
-static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
-{
-       /* Make sure we change the expectation after swapping PTEs */
-       wmb();
-
-       /* Set the expectation for workers, '0' means don't test */
-       set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
-       set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
-
-       /* Make sure workers have enough time to test */
-       do_delay();
-}
-
-#define TESTVAL1 0x0101010101010101
-#define TESTVAL2 0x0202020202020202
-
-/* Main vCPU doing the test */
-static void sender_guest_code(vm_vaddr_t test_data)
-{
-       struct test_data *data = (struct test_data *)test_data;
-       struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
-       struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
-       vm_paddr_t hcall_gpa = data->hcall_gpa;
-       int i, stage = 1;
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
-
-       /* "Slow" hypercalls */
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
-                                hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                       HV_FLUSH_ALL_PROCESSORS;
-               flush->processor_mask = 0;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
-                                hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                       HV_FLUSH_ALL_PROCESSORS;
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [1] */
-               flush_ex->gva_list[1] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [2] */
-               flush_ex->gva_list[2] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               flush_ex->gva_list[0] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       /* "Fast" hypercalls */
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                                HV_HYPERCALL_FAST_BIT, 0x0,
-                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                                HV_HYPERCALL_FAST_BIT, 0x0,
-                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                                HV_FLUSH_ALL_PROCESSORS);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
-                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                                HV_FLUSH_ALL_PROCESSORS);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [1] */
-               flush_ex->gva_list[1] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 :
-                         TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [2] */
-               flush_ex->gva_list[2] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                HV_HYPERCALL_FAST_BIT,
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               flush_ex->gva_list[0] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_DONE();
-}
-
-static void *vcpu_thread(void *arg)
-{
-       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
-       struct ucall uc;
-       int old;
-       int r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
-                   vcpu->id, r);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               /* NOT REACHED */
-       default:
-               TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
-       }
-
-       return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
-       void *retval;
-       int r;
-
-       r = pthread_cancel(thread);
-       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-
-       r = pthread_join(thread, &retval);
-       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-       TEST_ASSERT(retval == PTHREAD_CANCELED,
-                   "expected retval=%p, got %p", PTHREAD_CANCELED,
-                   retval);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu[3];
-       pthread_t threads[2];
-       vm_vaddr_t test_data_page, gva;
-       vm_paddr_t gpa;
-       uint64_t *pte;
-       struct test_data *data;
-       struct ucall uc;
-       int stage = 1, r, i;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
-
-       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
-
-       /* Test data page */
-       test_data_page = vm_vaddr_alloc_page(vm);
-       data = (struct test_data *)addr_gva2hva(vm, test_data_page);
-
-       /* Hypercall input/output */
-       data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
-       data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
-       memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
-
-       /*
-        * Test pages: the first one is filled with '0x01's, the second with '0x02's
-        * and the test will swap their mappings. The third page keeps the indication
-        * about the current state of mappings.
-        */
-       data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
-       for (i = 0; i < NTEST_PAGES; i++)
-               memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
-                      (u8)(i + 1), PAGE_SIZE);
-       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
-       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
-
-       /*
-        * Get PTE pointers for test pages and map them inside the guest.
-        * Use separate page for each PTE for simplicity.
-        */
-       gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
-       for (i = 0; i < NTEST_PAGES; i++) {
-               pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
-               gpa = addr_hva2gpa(vm, pte);
-               __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
-               data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
-       }
-
-       /*
-        * Sender vCPU which performs the test: swaps test pages, sets expectation
-        * for 'workers' and issues TLB flush hypercalls.
-        */
-       vcpu_args_set(vcpu[0], 1, test_data_page);
-       vcpu_set_hv_cpuid(vcpu[0]);
-
-       /* Create worker vCPUs which check the contents of the test pages */
-       vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
-       vcpu_args_set(vcpu[1], 1, test_data_page);
-       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
-       vcpu_set_hv_cpuid(vcpu[1]);
-
-       vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
-       vcpu_args_set(vcpu[2], 1, test_data_page);
-       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
-       vcpu_set_hv_cpuid(vcpu[2]);
-
-       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
-       TEST_ASSERT(!r, "pthread_create() failed");
-
-       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
-       TEST_ASSERT(!r, "pthread_create() failed");
-
-       while (true) {
-               vcpu_run(vcpu[0]);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu[0], &uc)) {
-               case UCALL_SYNC:
-                       TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)",
-                                   uc.args[1], stage);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               stage++;
-       }
-
-done:
-       cancel_join_vcpu_thread(threads[0], vcpu[1]);
-       cancel_join_vcpu_thread(threads[1], vcpu[2]);
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c

deleted file mode 100644 (file)

index 5bc1222..0000000
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Google LLC.
- *
- * Tests for adjusting the KVM clock from userspace
- */
-#include <asm/kvm_para.h>
-#include <asm/pvclock.h>
-#include <asm/pvclock-abi.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <time.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct test_case {
-       uint64_t kvmclock_base;
-       int64_t realtime_offset;
-};
-
-static struct test_case test_cases[] = {
-       { .kvmclock_base = 0 },
-       { .kvmclock_base = 180 * NSEC_PER_SEC },
-       { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
-       { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
-};
-
-#define GUEST_SYNC_CLOCK(__stage, __val)                       \
-               GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
-
-static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
-{
-       int i;
-
-       wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
-       for (i = 0; i < ARRAY_SIZE(test_cases); i++)
-               GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
-}
-
-#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
-
-static inline void assert_flags(struct kvm_clock_data *data)
-{
-       TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
-                   "unexpected clock data flags: %x (want set: %x)",
-                   data->flags, EXPECTED_FLAGS);
-}
-
-static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
-                       struct kvm_clock_data *end)
-{
-       uint64_t obs, exp_lo, exp_hi;
-
-       obs = uc->args[2];
-       exp_lo = start->clock;
-       exp_hi = end->clock;
-
-       assert_flags(start);
-       assert_flags(end);
-
-       TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
-                   "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
-                   obs, exp_lo, exp_hi);
-
-       pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
-               obs, exp_lo, exp_hi);
-}
-
-static void handle_abort(struct ucall *uc)
-{
-       REPORT_GUEST_ASSERT(*uc);
-}
-
-static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
-{
-       struct kvm_clock_data data;
-
-       memset(&data, 0, sizeof(data));
-
-       data.clock = test_case->kvmclock_base;
-       if (test_case->realtime_offset) {
-               struct timespec ts;
-               int r;
-
-               data.flags |= KVM_CLOCK_REALTIME;
-               do {
-                       r = clock_gettime(CLOCK_REALTIME, &ts);
-                       if (!r)
-                               break;
-               } while (errno == EINTR);
-
-               TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
-
-               data.realtime = ts.tv_sec * NSEC_PER_SEC;
-               data.realtime += ts.tv_nsec;
-               data.realtime += test_case->realtime_offset;
-       }
-
-       vm_ioctl(vm, KVM_SET_CLOCK, &data);
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct kvm_clock_data start, end;
-       struct kvm_vm *vm = vcpu->vm;
-       struct ucall uc;
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
-               setup_clock(vm, &test_cases[i]);
-
-               vm_ioctl(vm, KVM_GET_CLOCK, &start);
-
-               vcpu_run(vcpu);
-               vm_ioctl(vm, KVM_GET_CLOCK, &end);
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       handle_sync(&uc, &start, &end);
-                       break;
-               case UCALL_ABORT:
-                       handle_abort(&uc);
-                       return;
-               default:
-                       TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
-               }
-       }
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t pvti_gva;
-       vm_paddr_t pvti_gpa;
-       struct kvm_vm *vm;
-       int flags;
-
-       flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
-       TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
-
-       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
-       pvti_gpa = addr_gva2gpa(vm, pvti_gva);
-       vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
-
-       enter_guest(vcpu);
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c

deleted file mode 100644 (file)

index 78878b3..0000000
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct msr_data {
-       uint32_t idx;
-       const char *name;
-};
-
-#define TEST_MSR(msr) { .idx = msr, .name = #msr }
-#define UCALL_PR_MSR 0xdeadbeef
-#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
-
-/*
- * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
- * written, as the KVM_CPUID_FEATURES leaf is cleared.
- */
-static struct msr_data msrs_to_test[] = {
-       TEST_MSR(MSR_KVM_SYSTEM_TIME),
-       TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
-       TEST_MSR(MSR_KVM_WALL_CLOCK),
-       TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
-       TEST_MSR(MSR_KVM_ASYNC_PF_EN),
-       TEST_MSR(MSR_KVM_STEAL_TIME),
-       TEST_MSR(MSR_KVM_PV_EOI_EN),
-       TEST_MSR(MSR_KVM_POLL_CONTROL),
-       TEST_MSR(MSR_KVM_ASYNC_PF_INT),
-       TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
-};
-
-static void test_msr(struct msr_data *msr)
-{
-       uint64_t ignored;
-       uint8_t vector;
-
-       PR_MSR(msr);
-
-       vector = rdmsr_safe(msr->idx, &ignored);
-       GUEST_ASSERT_EQ(vector, GP_VECTOR);
-
-       vector = wrmsr_safe(msr->idx, 0);
-       GUEST_ASSERT_EQ(vector, GP_VECTOR);
-}
-
-struct hcall_data {
-       uint64_t nr;
-       const char *name;
-};
-
-#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
-#define UCALL_PR_HCALL 0xdeadc0de
-#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
-
-/*
- * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
- * features have been cleared in KVM_CPUID_FEATURES.
- */
-static struct hcall_data hcalls_to_test[] = {
-       TEST_HCALL(KVM_HC_KICK_CPU),
-       TEST_HCALL(KVM_HC_SEND_IPI),
-       TEST_HCALL(KVM_HC_SCHED_YIELD),
-};
-
-static void test_hcall(struct hcall_data *hc)
-{
-       uint64_t r;
-
-       PR_HCALL(hc);
-       r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
-       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
-}
-
-static void guest_main(void)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
-               test_msr(&msrs_to_test[i]);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
-               test_hcall(&hcalls_to_test[i]);
-       }
-
-       GUEST_DONE();
-}
-
-static void pr_msr(struct ucall *uc)
-{
-       struct msr_data *msr = (struct msr_data *)uc->args[0];
-
-       pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
-}
-
-static void pr_hcall(struct ucall *uc)
-{
-       struct hcall_data *hc = (struct hcall_data *)uc->args[0];
-
-       pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       while (true) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_PR_MSR:
-                       pr_msr(&uc);
-                       break;
-               case UCALL_PR_HCALL:
-                       pr_hcall(&uc);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       return;
-               case UCALL_DONE:
-                       return;
-               }
-       }
-}
-
-static void test_pv_unhalt(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_cpuid_entry2 *ent;
-       u32 kvm_sig_old;
-
-       pr_info("testing KVM_FEATURE_PV_UNHALT\n");
-
-       TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
-
-       /* KVM_PV_UNHALT test */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
-
-       TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
-                   "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
-
-       /* Make sure KVM clears vcpu->arch.kvm_cpuid */
-       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
-       kvm_sig_old = ent->ebx;
-       ent->ebx = 0xdeadbeef;
-       vcpu_set_cpuid(vcpu);
-
-       vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
-       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
-       ent->ebx = kvm_sig_old;
-       vcpu_set_cpuid(vcpu);
-
-       TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
-                   "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
-
-       /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
-
-       kvm_vm_free(vm);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
-
-       vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
-
-       enter_guest(vcpu);
-       kvm_vm_free(vm);
-
-       test_pv_unhalt();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c

deleted file mode 100644 (file)

index 7e2bfb3..0000000
--- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * maximum APIC ID capability tests
- *
- * Copyright (C) 2022, Intel, Inc.
- *
- * Tests for getting/setting maximum APIC ID capability
- */
-
-#include "kvm_util.h"
-
-#define MAX_VCPU_ID    2
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones();
-
-       /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
-       ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
-
-       /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
-       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
-       TEST_ASSERT(ret < 0,
-                   "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
-
-       /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
-       if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
-               vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
-
-               /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
-               ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
-               TEST_ASSERT(ret < 0,
-                           "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
-       }
-
-       /* Set KVM_CAP_MAX_VCPU_ID */
-       vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
-
-       /* Try to set KVM_CAP_MAX_VCPU_ID again */
-       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
-       TEST_ASSERT(ret < 0,
-                   "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
-
-       /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
-       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
-       TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
-
-       /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
-       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
-       TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
-
-       /* Create vCPU with id within bounds */
-       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
-       TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
-
-       close(ret);
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c

deleted file mode 100644 (file)

index 2b550ef..0000000
--- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define CPUID_MWAIT (1u << 3)
-
-enum monitor_mwait_testcases {
-       MWAIT_QUIRK_DISABLED = BIT(0),
-       MISC_ENABLES_QUIRK_DISABLED = BIT(1),
-       MWAIT_DISABLED = BIT(2),
-};
-
-/*
- * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
- * other scenarios KVM should emulate them as nops.
- */
-#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
-do {                                                                   \
-       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
-                           ((testcase) & MWAIT_DISABLED);              \
-                                                                       \
-       if (fault_wanted)                                               \
-               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
-                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
-                              testcase, vector);                       \
-       else                                                            \
-               __GUEST_ASSERT(!(vector),                               \
-                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
-                              testcase, vector);                       \
-} while (0)
-
-static void guest_monitor_wait(int testcase)
-{
-       u8 vector;
-
-       GUEST_SYNC(testcase);
-
-       /*
-        * Arbitrarily MONITOR this function, SVM performs fault checks before
-        * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
-        */
-       vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
-
-       vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
-       guest_monitor_wait(MWAIT_DISABLED);
-
-       guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       uint64_t disabled_quirks;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int testcase;
-
-       TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
-
-       while (1) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       testcase = uc.args[1];
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       goto done;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-                       goto done;
-               }
-
-               disabled_quirks = 0;
-               if (testcase & MWAIT_QUIRK_DISABLED)
-                       disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
-               if (testcase & MISC_ENABLES_QUIRK_DISABLED)
-                       disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
-               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
-               /*
-                * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
-                * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
-                * bit in MISC_ENABLES accordingly.  If the quirk is enabled,
-                * the only valid configuration is MWAIT disabled, as CPUID
-                * can't be manually changed after running the vCPU.
-                */
-               if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
-                       TEST_ASSERT(testcase & MWAIT_DISABLED,
-                                   "Can't toggle CPUID features after running vCPU");
-                       continue;
-               }
-
-               vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
-                            (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c

deleted file mode 100644 (file)

index 3eb0313..0000000
--- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
+++ /dev/null
@@ -1,288 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-/*
- * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
- * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
- */
-#define FAKE_TRIPLE_FAULT_VECTOR       0xaa
-
-/* Arbitrary 32-bit error code injected by this test. */
-#define SS_ERROR_CODE 0xdeadbeef
-
-/*
- * Bit '0' is set on Intel if the exception occurs while delivering a previous
- * event/exception.  AMD's wording is ambiguous, but presumably the bit is set
- * if the exception occurs while delivering an external event, e.g. NMI or INTR,
- * but not for exceptions that occur when delivering other exceptions or
- * software interrupts.
- *
- * Note, Intel's name for it, "External event", is misleading and much more
- * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
- */
-#define ERROR_CODE_EXT_FLAG    BIT(0)
-
-/*
- * Bit '1' is set if the fault occurred when looking up a descriptor in the
- * IDT, which is the case here as the IDT is empty/NULL.
- */
-#define ERROR_CODE_IDT_FLAG    BIT(1)
-
-/*
- * The #GP that occurs when vectoring #SS should show the index into the IDT
- * for #SS, plus have the "IDT flag" set.
- */
-#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
-#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
-
-/*
- * Intel and AMD both shove '0' into the error code on #DF, regardless of what
- * led to the double fault.
- */
-#define DF_ERROR_CODE 0
-
-#define INTERCEPT_SS           (BIT_ULL(SS_VECTOR))
-#define INTERCEPT_SS_DF                (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
-#define INTERCEPT_SS_GP_DF     (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
-
-static void l2_ss_pending_test(void)
-{
-       GUEST_SYNC(SS_VECTOR);
-}
-
-static void l2_ss_injected_gp_test(void)
-{
-       GUEST_SYNC(GP_VECTOR);
-}
-
-static void l2_ss_injected_df_test(void)
-{
-       GUEST_SYNC(DF_VECTOR);
-}
-
-static void l2_ss_injected_tf_test(void)
-{
-       GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
-}
-
-static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
-                      uint32_t error_code)
-{
-       struct vmcb *vmcb = svm->vmcb;
-       struct vmcb_control_area *ctrl = &vmcb->control;
-
-       vmcb->save.rip = (u64)l2_code;
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
-               return;
-
-       GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
-       GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
-}
-
-static void l1_svm_code(struct svm_test_data *svm)
-{
-       struct vmcb_control_area *ctrl = &svm->vmcb->control;
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       svm->vmcb->save.idtr.limit = 0;
-       ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
-
-       ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
-       svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
-       svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
-
-       ctrl->intercept_exceptions = INTERCEPT_SS_DF;
-       svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
-
-       ctrl->intercept_exceptions = INTERCEPT_SS;
-       svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
-       GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
-
-       GUEST_DONE();
-}
-
-static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
-{
-       GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
-
-       GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
-
-       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
-               return;
-
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
-       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
-}
-
-static void l1_vmx_code(struct vmx_pages *vmx)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
-
-       GUEST_ASSERT_EQ(load_vmcs(vmx), true);
-
-       prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
-
-       /*
-        * VMX disallows injecting an exception with error_code[31:16] != 0,
-        * and hardware will never generate a VM-Exit with bits 31:16 set.
-        * KVM should likewise truncate the "bad" userspace value.
-        */
-       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
-       vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
-       vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
-
-       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
-       vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
-
-       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
-       vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
-
-       GUEST_DONE();
-}
-
-static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
-{
-       if (this_cpu_has(X86_FEATURE_SVM))
-               l1_svm_code(test_data);
-       else
-               l1_vmx_code(test_data);
-}
-
-static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
-{
-       struct ucall uc;
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               TEST_ASSERT(vector == uc.args[1],
-                           "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
-               break;
-       case UCALL_DONE:
-               TEST_ASSERT(vector == -1,
-                           "Expected L2 to ask for %d, L2 says it's done", vector);
-               break;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       default:
-               TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
-       }
-}
-
-static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
-{
-       struct kvm_vcpu_events events;
-
-       vcpu_events_get(vcpu, &events);
-
-       TEST_ASSERT(!events.exception.pending,
-                   "Vector %d unexpectedlt pending", events.exception.nr);
-       TEST_ASSERT(!events.exception.injected,
-                   "Vector %d unexpectedly injected", events.exception.nr);
-
-       events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
-       events.exception.pending = !inject;
-       events.exception.injected = inject;
-       events.exception.nr = SS_VECTOR;
-       events.exception.has_error_code = true;
-       events.exception.error_code = SS_ERROR_CODE;
-       vcpu_events_set(vcpu, &events);
-}
-
-/*
- * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
- * when an exception is being queued for L2.  Specifically, verify that KVM
- * honors L1 exception intercept controls when a #SS is pending/injected,
- * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
- * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
- */
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t nested_test_data_gva;
-       struct kvm_vcpu_events events;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
-
-       if (kvm_cpu_has(X86_FEATURE_SVM))
-               vcpu_alloc_svm(vm, &nested_test_data_gva);
-       else
-               vcpu_alloc_vmx(vm, &nested_test_data_gva);
-
-       vcpu_args_set(vcpu, 1, nested_test_data_gva);
-
-       /* Run L1 => L2.  L2 should sync and request #SS. */
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, SS_VECTOR);
-
-       /* Pend #SS and request immediate exit.  #SS should still be pending. */
-       queue_ss_exception(vcpu, false);
-       vcpu->run->immediate_exit = true;
-       vcpu_run_complete_io(vcpu);
-
-       /* Verify the pending events comes back out the same as it went in. */
-       vcpu_events_get(vcpu, &events);
-       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
-                       KVM_VCPUEVENT_VALID_PAYLOAD);
-       TEST_ASSERT_EQ(events.exception.pending, true);
-       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
-       TEST_ASSERT_EQ(events.exception.has_error_code, true);
-       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
-
-       /*
-        * Run for real with the pending #SS, L1 should get a VM-Exit due to
-        * #SS interception and re-enter L2 to request #GP (via injected #SS).
-        */
-       vcpu->run->immediate_exit = false;
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, GP_VECTOR);
-
-       /*
-        * Inject #SS, the #SS should bypass interception and cause #GP, which
-        * L1 should intercept before KVM morphs it to #DF.  L1 should then
-        * disable #GP interception and run L2 to request #DF (via #SS => #GP).
-        */
-       queue_ss_exception(vcpu, true);
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, DF_VECTOR);
-
-       /*
-        * Inject #SS, the #SS should bypass interception and cause #GP, which
-        * L1 is no longer interception, and so should see a #DF VM-Exit.  L1
-        * should then signal that is done.
-        */
-       queue_ss_exception(vcpu, true);
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
-
-       /*
-        * Inject #SS yet again.  L1 is not intercepting #GP or #DF, and so
-        * should see nested TRIPLE_FAULT / SHUTDOWN.
-        */
-       queue_ss_exception(vcpu, true);
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, -1);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c

deleted file mode 100644 (file)

index e7efb2b..0000000
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ /dev/null
@@ -1,266 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Usage: to be run via nx_huge_page_test.sh, which does the necessary
- * environment setup and teardown
- *
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <stdint.h>
-#include <time.h>
-
-#include <test_util.h>
-#include "kvm_util.h"
-#include "processor.h"
-
-#define HPAGE_SLOT             10
-#define HPAGE_GPA              (4UL << 30) /* 4G prevents collision w/ slot 0 */
-#define HPAGE_GVA              HPAGE_GPA /* GVA is arbitrary, so use GPA. */
-#define PAGES_PER_2MB_HUGE_PAGE 512
-#define HPAGE_SLOT_NPAGES      (3 * PAGES_PER_2MB_HUGE_PAGE)
-
-/*
- * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
- * being run without it.
- */
-#define MAGIC_TOKEN 887563923
-
-/*
- * x86 opcode for the return instruction. Used to call into, and then
- * immediately return from, memory backed with hugepages.
- */
-#define RETURN_OPCODE 0xC3
-
-/* Call the specified memory address. */
-static void guest_do_CALL(uint64_t target)
-{
-       ((void (*)(void)) target)();
-}
-
-/*
- * Exit the VM after each memory access so that the userspace component of the
- * test can make assertions about the pages backing the VM.
- *
- * See the below for an explanation of how each access should affect the
- * backing mappings.
- */
-void guest_code(void)
-{
-       uint64_t hpage_1 = HPAGE_GVA;
-       uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
-       uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
-
-       READ_ONCE(*(uint64_t *)hpage_1);
-       GUEST_SYNC(1);
-
-       READ_ONCE(*(uint64_t *)hpage_2);
-       GUEST_SYNC(2);
-
-       guest_do_CALL(hpage_1);
-       GUEST_SYNC(3);
-
-       guest_do_CALL(hpage_3);
-       GUEST_SYNC(4);
-
-       READ_ONCE(*(uint64_t *)hpage_1);
-       GUEST_SYNC(5);
-
-       READ_ONCE(*(uint64_t *)hpage_3);
-       GUEST_SYNC(6);
-}
-
-static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
-{
-       int actual_pages_2m;
-
-       actual_pages_2m = vm_get_stat(vm, "pages_2m");
-
-       TEST_ASSERT(actual_pages_2m == expected_pages_2m,
-                   "Unexpected 2m page count. Expected %d, got %d",
-                   expected_pages_2m, actual_pages_2m);
-}
-
-static void check_split_count(struct kvm_vm *vm, int expected_splits)
-{
-       int actual_splits;
-
-       actual_splits = vm_get_stat(vm, "nx_lpage_splits");
-
-       TEST_ASSERT(actual_splits == expected_splits,
-                   "Unexpected NX huge page split count. Expected %d, got %d",
-                   expected_splits, actual_splits);
-}
-
-static void wait_for_reclaim(int reclaim_period_ms)
-{
-       long reclaim_wait_ms;
-       struct timespec ts;
-
-       reclaim_wait_ms = reclaim_period_ms * 5;
-       ts.tv_sec = reclaim_wait_ms / 1000;
-       ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
-       nanosleep(&ts, NULL);
-}
-
-void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
-             bool reboot_permissions)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t nr_bytes;
-       void *hva;
-       int r;
-
-       vm = vm_create(1);
-
-       if (disable_nx_huge_pages) {
-               r = __vm_disable_nx_huge_pages(vm);
-               if (reboot_permissions) {
-                       TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
-               } else {
-                       TEST_ASSERT(r == -1 && errno == EPERM,
-                                   "This process should not have permission to disable NX huge pages");
-                       return;
-               }
-       }
-
-       vcpu = vm_vcpu_add(vm, 0, guest_code);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
-                                   HPAGE_GPA, HPAGE_SLOT,
-                                   HPAGE_SLOT_NPAGES, 0);
-
-       nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
-
-       /*
-        * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
-        * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
-        * whenever KVM is shadowing the guest page tables).
-        *
-        * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
-        * pages irrespective of the guest page size, so map with 4KiB pages
-        * to test that that is the case.
-        */
-       if (kvm_is_tdp_enabled())
-               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
-       else
-               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
-
-       hva = addr_gpa2hva(vm, HPAGE_GPA);
-       memset(hva, RETURN_OPCODE, nr_bytes);
-
-       check_2m_page_count(vm, 0);
-       check_split_count(vm, 0);
-
-       /*
-        * The guest code will first read from the first hugepage, resulting
-        * in a huge page mapping being created.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, 1);
-       check_split_count(vm, 0);
-
-       /*
-        * Then the guest code will read from the second hugepage, resulting
-        * in another huge page mapping being created.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, 2);
-       check_split_count(vm, 0);
-
-       /*
-        * Next, the guest will execute from the first huge page, causing it
-        * to be remapped at 4k.
-        *
-        * If NX huge pages are disabled, this should have no effect.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
-       check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
-
-       /*
-        * Executing from the third huge page (previously unaccessed) will
-        * cause part to be mapped at 4k.
-        *
-        * If NX huge pages are disabled, it should be mapped at 2M.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
-       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
-
-       /* Reading from the first huge page again should have no effect. */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
-       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
-
-       /* Give recovery thread time to run. */
-       wait_for_reclaim(reclaim_period_ms);
-
-       /*
-        * Now that the reclaimer has run, all the split pages should be gone.
-        *
-        * If NX huge pages are disabled, the relaimer will not run, so
-        * nothing should change from here on.
-        */
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
-       check_split_count(vm, 0);
-
-       /*
-        * The 4k mapping on hpage 3 should have been removed, so check that
-        * reading from it causes a huge page mapping to be installed.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
-       check_split_count(vm, 0);
-
-       kvm_vm_free(vm);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
-       puts("");
-       printf(" -p: The NX reclaim period in milliseconds.\n");
-       printf(" -t: The magic token to indicate environment setup is done.\n");
-       printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
-       puts("");
-       exit(0);
-}
-
-int main(int argc, char **argv)
-{
-       int reclaim_period_ms = 0, token = 0, opt;
-       bool reboot_permissions = false;
-
-       while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
-               switch (opt) {
-               case 'p':
-                       reclaim_period_ms = atoi_positive("Reclaim period", optarg);
-                       break;
-               case 't':
-                       token = atoi_paranoid(optarg);
-                       break;
-               case 'r':
-                       reboot_permissions = true;
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       break;
-               }
-       }
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
-
-       __TEST_REQUIRE(token == MAGIC_TOKEN,
-                      "This test must be run with the magic token via '-t %d'.\n"
-                      "Running via nx_huge_pages_test.sh, which also handles "
-                      "environment setup, is strongly recommended.", MAGIC_TOKEN);
-
-       run_test(reclaim_period_ms, false, reboot_permissions);
-       run_test(reclaim_period_ms, true, reboot_permissions);
-
-       return 0;
-}
-
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh

deleted file mode 100755 (executable)

index caad084..0000000
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only */
-#
-# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
-# Makes use of root privileges to set up huge pages and KVM module parameters.
-#
-# Copyright (C) 2022, Google LLC.
-
-set -e
-
-NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
-NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
-NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
-HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
-
-# If we're already root, the host might not have sudo.
-if [ $(whoami) == "root" ]; then
-       function do_sudo () {
-               "$@"
-       }
-else
-       function do_sudo () {
-               sudo "$@"
-       }
-fi
-
-set +e
-
-function sudo_echo () {
-       echo "$1" | do_sudo tee -a "$2" > /dev/null
-}
-
-NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
-
-sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
-
-(
-       set -e
-
-       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
-       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
-       sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
-       sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
-
-       # Test with reboot permissions
-       if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
-               echo Running test with CAP_SYS_BOOT enabled
-               $NXECUTABLE -t 887563923 -p 100 -r
-               test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
-       else
-               echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
-       fi
-
-       # Test without reboot permissions
-       if [ $(whoami) != "root" ] ; then
-               echo Running test with CAP_SYS_BOOT disabled
-               $NXECUTABLE -t 887563923 -p 100
-       else
-               echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
-       fi
-)
-RET=$?
-
-sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
-sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
-sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
-sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
-
-exit $RET
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c

deleted file mode 100644 (file)

index 9cbf283..0000000
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies expected behavior of controlling guest access to
- * MSR_PLATFORM_INFO.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
-
-static void guest_code(void)
-{
-       uint64_t msr_platform_info;
-       uint8_t vector;
-
-       GUEST_SYNC(true);
-       msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
-       GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
-                       MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-
-       GUEST_SYNC(false);
-       vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
-       GUEST_ASSERT_EQ(vector, GP_VECTOR);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t msr_platform_info;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
-       vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
-                    msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-
-       for (;;) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_FAIL("Unexpected ucall %lu", uc.cmd);
-                       break;
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c

deleted file mode 100644 (file)

index 698cb36..0000000
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ /dev/null
@@ -1,644 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-#include <x86intrin.h>
-
-#include "pmu.h"
-#include "processor.h"
-
-/* Number of iterations of the loop for the guest measurement payload. */
-#define NUM_LOOPS                      10
-
-/* Each iteration of the loop retires one branch instruction. */
-#define NUM_BRANCH_INSNS_RETIRED       (NUM_LOOPS)
-
-/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
- */
-#define NUM_INSNS_PER_LOOP             3
-
-/*
- * Number of "extra" instructions that will be counted, i.e. the number of
- * instructions that are needed to set up the loop and then disable the
- * counter.  2 MOV, 2 XOR, 1 WRMSR.
- */
-#define NUM_EXTRA_INSNS                        5
-
-/* Total number of instructions retired within the measured section. */
-#define NUM_INSNS_RETIRED              (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
-
-
-static uint8_t kvm_pmu_version;
-static bool kvm_has_perf_caps;
-
-static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
-                                                 void *guest_code,
-                                                 uint8_t pmu_version,
-                                                 uint64_t perf_capabilities)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(vcpu, guest_code);
-       sync_global_to_guest(vm, kvm_pmu_version);
-
-       /*
-        * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
-        * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
-        */
-       if (kvm_has_perf_caps)
-               vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
-
-       vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
-       return vm;
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       do {
-               vcpu_run(vcpu);
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_PRINTF:
-                       pr_info("%s", uc.buffer);
-                       break;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-               }
-       } while (uc.cmd != UCALL_DONE);
-}
-
-static uint8_t guest_get_pmu_version(void)
-{
-       /*
-        * Return the effective PMU version, i.e. the minimum between what KVM
-        * supports and what is enumerated to the guest.  The host deliberately
-        * advertises a PMU version to the guest beyond what is actually
-        * supported by KVM to verify KVM doesn't freak out and do something
-        * bizarre with an architecturally valid, but unsupported, version.
-        */
-       return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
-}
-
-/*
- * If an architectural event is supported and guaranteed to generate at least
- * one "hit, assert that its count is non-zero.  If an event isn't supported or
- * the test can't guarantee the associated action will occur, then all bets are
- * off regarding the count, i.e. no checks can be done.
- *
- * Sanity check that in all cases, the event doesn't count when it's disabled,
- * and that KVM correctly emulates the write of an arbitrary value.
- */
-static void guest_assert_event_count(uint8_t idx,
-                                    struct kvm_x86_pmu_feature event,
-                                    uint32_t pmc, uint32_t pmc_msr)
-{
-       uint64_t count;
-
-       count = _rdpmc(pmc);
-       if (!this_pmu_has(event))
-               goto sanity_checks;
-
-       switch (idx) {
-       case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
-               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
-               break;
-       case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
-               GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
-               break;
-       case INTEL_ARCH_LLC_REFERENCES_INDEX:
-       case INTEL_ARCH_LLC_MISSES_INDEX:
-               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
-                   !this_cpu_has(X86_FEATURE_CLFLUSH))
-                       break;
-               fallthrough;
-       case INTEL_ARCH_CPU_CYCLES_INDEX:
-       case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
-               GUEST_ASSERT_NE(count, 0);
-               break;
-       case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
-               GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
-               break;
-       default:
-               break;
-       }
-
-sanity_checks:
-       __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
-       GUEST_ASSERT_EQ(_rdpmc(pmc), count);
-
-       wrmsr(pmc_msr, 0xdead);
-       GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
-}
-
-/*
- * Enable and disable the PMC in a monolithic asm blob to ensure that the
- * compiler can't insert _any_ code into the measured sequence.  Note, ECX
- * doesn't need to be clobbered as the input value, @pmc_msr, is restored
- * before the end of the sequence.
- *
- * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
- * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
- * misses, i.e. to allow testing that those events actually count.
- *
- * If forced emulation is enabled (and specified), force emulation on a subset
- * of the measured code to verify that KVM correctly emulates instructions and
- * branches retired events in conjunction with hardware also counting said
- * events.
- */
-#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)                                \
-do {                                                                           \
-       __asm__ __volatile__("wrmsr\n\t"                                        \
-                            " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"      \
-                            "1:\n\t"                                           \
-                            clflush "\n\t"                                     \
-                            "mfence\n\t"                                       \
-                            FEP "loop 1b\n\t"                                  \
-                            FEP "mov %%edi, %%ecx\n\t"                         \
-                            FEP "xor %%eax, %%eax\n\t"                         \
-                            FEP "xor %%edx, %%edx\n\t"                         \
-                            "wrmsr\n\t"                                        \
-                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
-                               "c"(_msr), "D"(_msr)                            \
-       );                                                                      \
-} while (0)
-
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
-do {                                                                           \
-       wrmsr(pmc_msr, 0);                                                      \
-                                                                               \
-       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))                               \
-               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);    \
-       else if (this_cpu_has(X86_FEATURE_CLFLUSH))                             \
-               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);       \
-       else                                                                    \
-               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);             \
-                                                                               \
-       guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);                 \
-} while (0)
-
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
-                                   uint32_t pmc, uint32_t pmc_msr,
-                                   uint32_t ctrl_msr, uint64_t ctrl_msr_value)
-{
-       GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
-
-       if (is_forced_emulation_enabled)
-               GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL                                           \
-({                                                                     \
-       struct kvm_x86_pmu_feature feature = {};                        \
-                                                                       \
-       feature;                                                        \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
-       return !(*(u64 *)&event);
-}
-
-static void guest_test_arch_event(uint8_t idx)
-{
-       const struct {
-               struct kvm_x86_pmu_feature gp_event;
-               struct kvm_x86_pmu_feature fixed_event;
-       } intel_event_to_feature[] = {
-               [INTEL_ARCH_CPU_CYCLES_INDEX]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
-               [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
-               /*
-                * Note, the fixed counter for reference cycles is NOT the same
-                * as the general purpose architectural event.  The fixed counter
-                * explicitly counts at the same frequency as the TSC, whereas
-                * the GP event counts at a fixed, but uarch specific, frequency.
-                * Bundle them here for simplicity.
-                */
-               [INTEL_ARCH_REFERENCE_CYCLES_INDEX]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
-               [INTEL_ARCH_LLC_REFERENCES_INDEX]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_LLC_MISSES_INDEX]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_BRANCHES_RETIRED_INDEX]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_TOPDOWN_SLOTS_INDEX]         = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
-       };
-
-       uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-       uint32_t pmu_version = guest_get_pmu_version();
-       /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
-       bool guest_has_perf_global_ctrl = pmu_version >= 2;
-       struct kvm_x86_pmu_feature gp_event, fixed_event;
-       uint32_t base_pmc_msr;
-       unsigned int i;
-
-       /* The host side shouldn't invoke this without a guest PMU. */
-       GUEST_ASSERT(pmu_version);
-
-       if (this_cpu_has(X86_FEATURE_PDCM) &&
-           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
-               base_pmc_msr = MSR_IA32_PMC0;
-       else
-               base_pmc_msr = MSR_IA32_PERFCTR0;
-
-       gp_event = intel_event_to_feature[idx].gp_event;
-       GUEST_ASSERT_EQ(idx, gp_event.f.bit);
-
-       GUEST_ASSERT(nr_gp_counters);
-
-       for (i = 0; i < nr_gp_counters; i++) {
-               uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
-                                   ARCH_PERFMON_EVENTSEL_ENABLE |
-                                   intel_pmu_arch_events[idx];
-
-               wrmsr(MSR_P6_EVNTSEL0 + i, 0);
-               if (guest_has_perf_global_ctrl)
-                       wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
-
-               __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
-                                       MSR_P6_EVNTSEL0 + i, eventsel);
-       }
-
-       if (!guest_has_perf_global_ctrl)
-               return;
-
-       fixed_event = intel_event_to_feature[idx].fixed_event;
-       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
-               return;
-
-       i = fixed_event.f.bit;
-
-       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-
-       __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
-                               MSR_CORE_PERF_FIXED_CTR0 + i,
-                               MSR_CORE_PERF_GLOBAL_CTRL,
-                               FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-}
-
-static void guest_test_arch_events(void)
-{
-       uint8_t i;
-
-       for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
-               guest_test_arch_event(i);
-
-       GUEST_DONE();
-}
-
-static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
-                            uint8_t length, uint8_t unavailable_mask)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /* Testing arch events requires a vPMU (there are no negative tests). */
-       if (!pmu_version)
-               return;
-
-       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
-                                        pmu_version, perf_capabilities);
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
-                               length);
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
-                               unavailable_mask);
-
-       run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-/*
- * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
- * that aren't defined counter MSRs *probably* don't exist, but there's no
- * guarantee that currently undefined MSR indices won't be used for something
- * other than PMCs in the future.
- */
-#define MAX_NR_GP_COUNTERS     8
-#define MAX_NR_FIXED_COUNTERS  3
-
-#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)              \
-__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,                      \
-              "Expected %s on " #insn "(0x%x), got vector %u",                 \
-              expect_gp ? "#GP" : "no fault", msr, vector)                     \
-
-#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)                       \
-       __GUEST_ASSERT(val == expected_val,                                     \
-                      "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",    \
-                      msr, expected_val, val);
-
-static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
-                            uint64_t expected_val)
-{
-       uint8_t vector;
-       uint64_t val;
-
-       vector = rdpmc_safe(rdpmc_idx, &val);
-       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
-       if (expect_success)
-               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
-
-       if (!is_forced_emulation_enabled)
-               return;
-
-       vector = rdpmc_safe_fep(rdpmc_idx, &val);
-       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
-       if (expect_success)
-               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
-}
-
-static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
-                                uint8_t nr_counters, uint32_t or_mask)
-{
-       const bool pmu_has_fast_mode = !guest_get_pmu_version();
-       uint8_t i;
-
-       for (i = 0; i < nr_possible_counters; i++) {
-               /*
-                * TODO: Test a value that validates full-width writes and the
-                * width of the counters.
-                */
-               const uint64_t test_val = 0xffff;
-               const uint32_t msr = base_msr + i;
-
-               /*
-                * Fixed counters are supported if the counter is less than the
-                * number of enumerated contiguous counters *or* the counter is
-                * explicitly enumerated in the supported counters mask.
-                */
-               const bool expect_success = i < nr_counters || (or_mask & BIT(i));
-
-               /*
-                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
-                * unsupported, i.e. doesn't #GP and reads back '0'.
-                */
-               const uint64_t expected_val = expect_success ? test_val : 0;
-               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
-                                      msr != MSR_P6_PERFCTR1;
-               uint32_t rdpmc_idx;
-               uint8_t vector;
-               uint64_t val;
-
-               vector = wrmsr_safe(msr, test_val);
-               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
-
-               vector = rdmsr_safe(msr, &val);
-               GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
-
-               /* On #GP, the result of RDMSR is undefined. */
-               if (!expect_gp)
-                       GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
-
-               /*
-                * Redo the read tests with RDPMC, which has different indexing
-                * semantics and additional capabilities.
-                */
-               rdpmc_idx = i;
-               if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
-                       rdpmc_idx |= INTEL_RDPMC_FIXED;
-
-               guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
-
-               /*
-                * KVM doesn't support non-architectural PMUs, i.e. it should
-                * impossible to have fast mode RDPMC.  Verify that attempting
-                * to use fast RDPMC always #GPs.
-                */
-               GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
-               rdpmc_idx |= INTEL_RDPMC_FAST;
-               guest_test_rdpmc(rdpmc_idx, false, -1ull);
-
-               vector = wrmsr_safe(msr, 0);
-               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
-       }
-}
-
-static void guest_test_gp_counters(void)
-{
-       uint8_t pmu_version = guest_get_pmu_version();
-       uint8_t nr_gp_counters = 0;
-       uint32_t base_msr;
-
-       if (pmu_version)
-               nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-
-       /*
-        * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
-        * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
-        * of GP counters.  If there are no GP counters, require KVM to leave
-        * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
-        * follow the spirit of the architecture and only globally enable GP
-        * counters, of which there are none.
-        */
-       if (pmu_version > 1) {
-               uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
-
-               if (nr_gp_counters)
-                       GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
-               else
-                       GUEST_ASSERT_EQ(global_ctrl, 0);
-       }
-
-       if (this_cpu_has(X86_FEATURE_PDCM) &&
-           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
-               base_msr = MSR_IA32_PMC0;
-       else
-               base_msr = MSR_IA32_PERFCTR0;
-
-       guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
-       GUEST_DONE();
-}
-
-static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
-                            uint8_t nr_gp_counters)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
-                                        pmu_version, perf_capabilities);
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
-                               nr_gp_counters);
-
-       run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void guest_test_fixed_counters(void)
-{
-       uint64_t supported_bitmask = 0;
-       uint8_t nr_fixed_counters = 0;
-       uint8_t i;
-
-       /* Fixed counters require Architectural vPMU Version 2+. */
-       if (guest_get_pmu_version() >= 2)
-               nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-
-       /*
-        * The supported bitmask for fixed counters was introduced in PMU
-        * version 5.
-        */
-       if (guest_get_pmu_version() >= 5)
-               supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
-
-       guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
-                            nr_fixed_counters, supported_bitmask);
-
-       for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
-               uint8_t vector;
-               uint64_t val;
-
-               if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
-                       vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
-                                           FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-                       __GUEST_ASSERT(vector == GP_VECTOR,
-                                      "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
-
-                       vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
-                                           FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-                       __GUEST_ASSERT(vector == GP_VECTOR,
-                                      "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
-                       continue;
-               }
-
-               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
-               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-               __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
-
-               GUEST_ASSERT_NE(val, 0);
-       }
-       GUEST_DONE();
-}
-
-static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
-                               uint8_t nr_fixed_counters,
-                               uint32_t supported_bitmask)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
-                                        pmu_version, perf_capabilities);
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
-                               supported_bitmask);
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
-                               nr_fixed_counters);
-
-       run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void test_intel_counters(void)
-{
-       uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-       uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
-       unsigned int i;
-       uint8_t v, j;
-       uint32_t k;
-
-       const uint64_t perf_caps[] = {
-               0,
-               PMU_CAP_FW_WRITES,
-       };
-
-       /*
-        * Test up to PMU v5, which is the current maximum version defined by
-        * Intel, i.e. is the last version that is guaranteed to be backwards
-        * compatible with KVM's existing behavior.
-        */
-       uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
-
-       /*
-        * Detect the existence of events that aren't supported by selftests.
-        * This will (obviously) fail any time the kernel adds support for a
-        * new event, but it's worth paying that price to keep the test fresh.
-        */
-       TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
-                   "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
-                   nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
-
-       /*
-        * Force iterating over known arch events regardless of whether or not
-        * KVM/hardware supports a given event.
-        */
-       nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
-
-       for (v = 0; v <= max_pmu_version; v++) {
-               for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
-                       if (!kvm_has_perf_caps && perf_caps[i])
-                               continue;
-
-                       pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
-                               v, perf_caps[i]);
-                       /*
-                        * To keep the total runtime reasonable, test every
-                        * possible non-zero, non-reserved bitmap combination
-                        * only with the native PMU version and the full bit
-                        * vector length.
-                        */
-                       if (v == pmu_version) {
-                               for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
-                                       test_arch_events(v, perf_caps[i], nr_arch_events, k);
-                       }
-                       /*
-                        * Test single bits for all PMU version and lengths up
-                        * the number of events +1 (to verify KVM doesn't do
-                        * weird things if the guest length is greater than the
-                        * host length).  Explicitly test a mask of '0' and all
-                        * ones i.e. all events being available and unavailable.
-                        */
-                       for (j = 0; j <= nr_arch_events + 1; j++) {
-                               test_arch_events(v, perf_caps[i], j, 0);
-                               test_arch_events(v, perf_caps[i], j, 0xff);
-
-                               for (k = 0; k < nr_arch_events; k++)
-                                       test_arch_events(v, perf_caps[i], j, BIT(k));
-                       }
-
-                       pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
-                               v, perf_caps[i]);
-                       for (j = 0; j <= nr_gp_counters; j++)
-                               test_gp_counters(v, perf_caps[i], j);
-
-                       pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
-                               v, perf_caps[i]);
-                       for (j = 0; j <= nr_fixed_counters; j++) {
-                               for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
-                                       test_fixed_counters(v, perf_caps[i], j, k);
-                       }
-               }
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_is_pmu_enabled());
-
-       TEST_REQUIRE(host_cpu_is_intel);
-       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
-       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
-
-       kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
-       kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
-
-       test_intel_counters();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c

deleted file mode 100644 (file)

index c15513c..0000000
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ /dev/null
@@ -1,876 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_SET_PMU_EVENT_FILTER.
- *
- * Copyright (C) 2022, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies the expected behavior of allow lists and deny lists for
- * virtual PMU events.
- */
-#include "kvm_util.h"
-#include "pmu.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define NUM_BRANCHES 42
-#define MAX_TEST_EVENTS                10
-
-#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
-#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
-#define PMU_EVENT_FILTER_INVALID_NEVENTS               (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
-
-struct __kvm_pmu_event_filter {
-       __u32 action;
-       __u32 nevents;
-       __u32 fixed_counter_bitmap;
-       __u32 flags;
-       __u32 pad[4];
-       __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
-};
-
-/*
- * This event list comprises Intel's known architectural events, plus AMD's
- * Branch Instructions Retired for Zen CPUs.  Note, AMD and Intel use the
- * same encoding for Instructions Retired.
- */
-kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
-
-static const struct __kvm_pmu_event_filter base_event_filter = {
-       .nevents = ARRAY_SIZE(base_event_filter.events),
-       .events = {
-               INTEL_ARCH_CPU_CYCLES,
-               INTEL_ARCH_INSTRUCTIONS_RETIRED,
-               INTEL_ARCH_REFERENCE_CYCLES,
-               INTEL_ARCH_LLC_REFERENCES,
-               INTEL_ARCH_LLC_MISSES,
-               INTEL_ARCH_BRANCHES_RETIRED,
-               INTEL_ARCH_BRANCHES_MISPREDICTED,
-               INTEL_ARCH_TOPDOWN_SLOTS,
-               AMD_ZEN_BRANCHES_RETIRED,
-       },
-};
-
-struct {
-       uint64_t loads;
-       uint64_t stores;
-       uint64_t loads_stores;
-       uint64_t branches_retired;
-       uint64_t instructions_retired;
-} pmc_results;
-
-/*
- * If we encounter a #GP during the guest PMU sanity check, then the guest
- * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
- */
-static void guest_gp_handler(struct ex_regs *regs)
-{
-       GUEST_SYNC(-EFAULT);
-}
-
-/*
- * Check that we can write a new value to the given MSR and read it back.
- * The caller should provide a non-empty set of bits that are safe to flip.
- *
- * Return on success. GUEST_SYNC(0) on error.
- */
-static void check_msr(uint32_t msr, uint64_t bits_to_flip)
-{
-       uint64_t v = rdmsr(msr) ^ bits_to_flip;
-
-       wrmsr(msr, v);
-       if (rdmsr(msr) != v)
-               GUEST_SYNC(-EIO);
-
-       v ^= bits_to_flip;
-       wrmsr(msr, v);
-       if (rdmsr(msr) != v)
-               GUEST_SYNC(-EIO);
-}
-
-static void run_and_measure_loop(uint32_t msr_base)
-{
-       const uint64_t branches_retired = rdmsr(msr_base + 0);
-       const uint64_t insn_retired = rdmsr(msr_base + 1);
-
-       __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
-
-       pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
-       pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
-}
-
-static void intel_guest_code(void)
-{
-       check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
-       check_msr(MSR_P6_EVNTSEL0, 0xffff);
-       check_msr(MSR_IA32_PMC0, 0xffff);
-       GUEST_SYNC(0);
-
-       for (;;) {
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
-               wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
-
-               run_and_measure_loop(MSR_IA32_PMC0);
-               GUEST_SYNC(0);
-       }
-}
-
-/*
- * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
- * this code uses the always-available, legacy K7 PMU MSRs, which alias to
- * the first four of the six extended core PMU MSRs.
- */
-static void amd_guest_code(void)
-{
-       check_msr(MSR_K7_EVNTSEL0, 0xffff);
-       check_msr(MSR_K7_PERFCTR0, 0xffff);
-       GUEST_SYNC(0);
-
-       for (;;) {
-               wrmsr(MSR_K7_EVNTSEL0, 0);
-               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
-               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
-
-               run_and_measure_loop(MSR_K7_PERFCTR0);
-               GUEST_SYNC(0);
-       }
-}
-
-/*
- * Run the VM to the next GUEST_SYNC(value), and return the value passed
- * to the sync. Any other exit from the guest is fatal.
- */
-static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       get_ucall(vcpu, &uc);
-       TEST_ASSERT(uc.cmd == UCALL_SYNC,
-                   "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
-       return uc.args[1];
-}
-
-static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
-{
-       uint64_t r;
-
-       memset(&pmc_results, 0, sizeof(pmc_results));
-       sync_global_to_guest(vcpu->vm, pmc_results);
-
-       r = run_vcpu_to_sync(vcpu);
-       TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
-
-       sync_global_from_guest(vcpu->vm, pmc_results);
-}
-
-/*
- * In a nested environment or if the vPMU is disabled, the guest PMU
- * might not work as architected (accessing the PMU MSRs may raise
- * #GP, or writes could simply be discarded). In those situations,
- * there is no point in running these tests. The guest code will perform
- * a sanity check and then GUEST_SYNC(success). In the case of failure,
- * the behavior of the guest on resumption is undefined.
- */
-static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
-{
-       uint64_t r;
-
-       vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
-       r = run_vcpu_to_sync(vcpu);
-       vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
-
-       return !r;
-}
-
-/*
- * Remove the first occurrence of 'event' (if any) from the filter's
- * event list.
- */
-static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
-{
-       bool found = false;
-       int i;
-
-       for (i = 0; i < f->nevents; i++) {
-               if (found)
-                       f->events[i - 1] = f->events[i];
-               else
-                       found = f->events[i] == event;
-       }
-       if (found)
-               f->nevents--;
-}
-
-#define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
-do {                                                                                   \
-       uint64_t br = pmc_results.branches_retired;                                     \
-       uint64_t ir = pmc_results.instructions_retired;                                 \
-                                                                                       \
-       if (br && br != NUM_BRANCHES)                                                   \
-               pr_info("%s: Branch instructions retired = %lu (expected %u)\n",        \
-                       __func__, br, NUM_BRANCHES);                                    \
-       TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",         \
-                   __func__, br);                                                      \
-       TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)",                \
-                   __func__, ir);                                                      \
-} while (0)
-
-#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()                                         \
-do {                                                                                   \
-       uint64_t br = pmc_results.branches_retired;                                     \
-       uint64_t ir = pmc_results.instructions_retired;                                 \
-                                                                                       \
-       TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",          \
-                   __func__, br);                                                      \
-       TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)",                 \
-                   __func__, ir);                                                      \
-} while (0)
-
-static void test_without_filter(struct kvm_vcpu *vcpu)
-{
-       run_vcpu_and_sync_pmc_results(vcpu);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_with_filter(struct kvm_vcpu *vcpu,
-                            struct __kvm_pmu_event_filter *__f)
-{
-       struct kvm_pmu_event_filter *f = (void *)__f;
-
-       vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-       run_vcpu_and_sync_pmc_results(vcpu);
-}
-
-static void test_amd_deny_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = {
-               .action = KVM_PMU_EVENT_DENY,
-               .nevents = 1,
-               .events = {
-                       RAW_EVENT(0x1C2, 0),
-               },
-       };
-
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_member_deny_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_DENY;
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
-}
-
-static void test_member_allow_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_ALLOW;
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_DENY;
-
-       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
-       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
-       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_ALLOW;
-
-       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
-       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
-       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
-}
-
-/*
- * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
- *
- * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
- */
-static void test_pmu_config_disable(void (*guest_code)(void))
-{
-       struct kvm_vcpu *vcpu;
-       int r;
-       struct kvm_vm *vm;
-
-       r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
-       if (!(r & KVM_PMU_CAP_DISABLE))
-               return;
-
-       vm = vm_create(1);
-
-       vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
-
-       vcpu = vm_vcpu_add(vm, 0, guest_code);
-       TEST_ASSERT(!sanity_check_pmu(vcpu),
-                   "Guest should not be able to use disabled PMU.");
-
-       kvm_vm_free(vm);
-}
-
-/*
- * On Intel, check for a non-zero PMU version, at least one general-purpose
- * counter per logical processor, and support for counting the number of branch
- * instructions retired.
- */
-static bool use_intel_pmu(void)
-{
-       return host_cpu_is_intel &&
-              kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
-              kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
-              kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
-}
-
-/*
- * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
- * 0xc2,0 for Branch Instructions Retired.
- */
-static bool use_amd_pmu(void)
-{
-       return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
-}
-
-/*
- * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
- * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
- * supported on Intel Xeon processors:
- *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
- */
-#define MEM_INST_RETIRED               0xD0
-#define MEM_INST_RETIRED_LOAD          RAW_EVENT(MEM_INST_RETIRED, 0x81)
-#define MEM_INST_RETIRED_STORE         RAW_EVENT(MEM_INST_RETIRED, 0x82)
-#define MEM_INST_RETIRED_LOAD_STORE    RAW_EVENT(MEM_INST_RETIRED, 0x83)
-
-static bool supports_event_mem_inst_retired(void)
-{
-       uint32_t eax, ebx, ecx, edx;
-
-       cpuid(1, &eax, &ebx, &ecx, &edx);
-       if (x86_family(eax) == 0x6) {
-               switch (x86_model(eax)) {
-               /* Sapphire Rapids */
-               case 0x8F:
-               /* Ice Lake */
-               case 0x6A:
-               /* Skylake */
-               /* Cascade Lake */
-               case 0x55:
-                       return true;
-               }
-       }
-
-       return false;
-}
-
-/*
- * "LS Dispatch", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- */
-#define LS_DISPATCH            0x29
-#define LS_DISPATCH_LOAD       RAW_EVENT(LS_DISPATCH, BIT(0))
-#define LS_DISPATCH_STORE      RAW_EVENT(LS_DISPATCH, BIT(1))
-#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
-
-#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
-       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
-#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
-       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
-
-static void masked_events_guest_test(uint32_t msr_base)
-{
-       /*
-        * The actual value of the counters don't determine the outcome of
-        * the test.  Only that they are zero or non-zero.
-        */
-       const uint64_t loads = rdmsr(msr_base + 0);
-       const uint64_t stores = rdmsr(msr_base + 1);
-       const uint64_t loads_stores = rdmsr(msr_base + 2);
-       int val;
-
-
-       __asm__ __volatile__("movl $0, %[v];"
-                            "movl %[v], %%eax;"
-                            "incl %[v];"
-                            : [v]"+m"(val) :: "eax");
-
-       pmc_results.loads = rdmsr(msr_base + 0) - loads;
-       pmc_results.stores = rdmsr(msr_base + 1) - stores;
-       pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
-}
-
-static void intel_masked_events_guest_code(void)
-{
-       for (;;) {
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-               wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
-               wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
-               wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
-
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
-
-               masked_events_guest_test(MSR_IA32_PMC0);
-               GUEST_SYNC(0);
-       }
-}
-
-static void amd_masked_events_guest_code(void)
-{
-       for (;;) {
-               wrmsr(MSR_K7_EVNTSEL0, 0);
-               wrmsr(MSR_K7_EVNTSEL1, 0);
-               wrmsr(MSR_K7_EVNTSEL2, 0);
-
-               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
-               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
-               wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
-
-               masked_events_guest_test(MSR_K7_PERFCTR0);
-               GUEST_SYNC(0);
-       }
-}
-
-static void run_masked_events_test(struct kvm_vcpu *vcpu,
-                                  const uint64_t masked_events[],
-                                  const int nmasked_events)
-{
-       struct __kvm_pmu_event_filter f = {
-               .nevents = nmasked_events,
-               .action = KVM_PMU_EVENT_ALLOW,
-               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
-       };
-
-       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
-       test_with_filter(vcpu, &f);
-}
-
-#define ALLOW_LOADS            BIT(0)
-#define ALLOW_STORES           BIT(1)
-#define ALLOW_LOADS_STORES     BIT(2)
-
-struct masked_events_test {
-       uint64_t intel_events[MAX_TEST_EVENTS];
-       uint64_t intel_event_end;
-       uint64_t amd_events[MAX_TEST_EVENTS];
-       uint64_t amd_event_end;
-       const char *msg;
-       uint32_t flags;
-};
-
-/*
- * These are the test cases for the masked events tests.
- *
- * For each test, the guest enables 3 PMU counters (loads, stores,
- * loads + stores).  The filter is then set in KVM with the masked events
- * provided.  The test then verifies that the counters agree with which
- * ones should be counting and which ones should be filtered.
- */
-const struct masked_events_test test_cases[] = {
-       {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
-               },
-               .msg = "Only allow loads.",
-               .flags = ALLOW_LOADS,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
-               },
-               .msg = "Only allow stores.",
-               .flags = ALLOW_STORES,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
-               },
-               .msg = "Only allow loads + stores.",
-               .flags = ALLOW_LOADS_STORES,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
-                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
-               },
-               .msg = "Only allow loads and stores.",
-               .flags = ALLOW_LOADS | ALLOW_STORES,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
-                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
-                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
-               },
-               .msg = "Only allow loads and loads + stores.",
-               .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
-                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
-               },
-               .msg = "Only allow stores and loads + stores.",
-               .flags = ALLOW_STORES | ALLOW_LOADS_STORES
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
-               },
-               .msg = "Only allow loads, stores, and loads + stores.",
-               .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
-       },
-};
-
-static int append_test_events(const struct masked_events_test *test,
-                             uint64_t *events, int nevents)
-{
-       const uint64_t *evts;
-       int i;
-
-       evts = use_intel_pmu() ? test->intel_events : test->amd_events;
-       for (i = 0; i < MAX_TEST_EVENTS; i++) {
-               if (evts[i] == 0)
-                       break;
-
-               events[nevents + i] = evts[i];
-       }
-
-       return nevents + i;
-}
-
-static bool bool_eq(bool a, bool b)
-{
-       return a == b;
-}
-
-static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
-                                   int nevents)
-{
-       int ntests = ARRAY_SIZE(test_cases);
-       int i, n;
-
-       for (i = 0; i < ntests; i++) {
-               const struct masked_events_test *test = &test_cases[i];
-
-               /* Do any test case events overflow MAX_TEST_EVENTS? */
-               assert(test->intel_event_end == 0);
-               assert(test->amd_event_end == 0);
-
-               n = append_test_events(test, events, nevents);
-
-               run_masked_events_test(vcpu, events, n);
-
-               TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
-                           bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
-                           bool_eq(pmc_results.loads_stores,
-                                   test->flags & ALLOW_LOADS_STORES),
-                           "%s  loads: %lu, stores: %lu, loads + stores: %lu",
-                           test->msg, pmc_results.loads, pmc_results.stores,
-                           pmc_results.loads_stores);
-       }
-}
-
-static void add_dummy_events(uint64_t *events, int nevents)
-{
-       int i;
-
-       for (i = 0; i < nevents; i++) {
-               int event_select = i % 0xFF;
-               bool exclude = ((i % 4) == 0);
-
-               if (event_select == MEM_INST_RETIRED ||
-                   event_select == LS_DISPATCH)
-                       event_select++;
-
-               events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
-                                                       0, exclude);
-       }
-}
-
-static void test_masked_events(struct kvm_vcpu *vcpu)
-{
-       int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
-       uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
-
-       /* Run the test cases against a sparse PMU event filter. */
-       run_masked_events_tests(vcpu, events, 0);
-
-       /* Run the test cases against a dense PMU event filter. */
-       add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
-       run_masked_events_tests(vcpu, events, nevents);
-}
-
-static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
-                               struct __kvm_pmu_event_filter *__f)
-{
-       struct kvm_pmu_event_filter *f = (void *)__f;
-
-       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-}
-
-static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
-                                      uint32_t flags, uint32_t action)
-{
-       struct __kvm_pmu_event_filter f = {
-               .nevents = 1,
-               .flags = flags,
-               .action = action,
-               .events = {
-                       event,
-               },
-       };
-
-       return set_pmu_event_filter(vcpu, &f);
-}
-
-static void test_filter_ioctl(struct kvm_vcpu *vcpu)
-{
-       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       struct __kvm_pmu_event_filter f;
-       uint64_t e = ~0ul;
-       int r;
-
-       /*
-        * Unfortunately having invalid bits set in event data is expected to
-        * pass when flags == 0 (bits other than eventsel+umask).
-        */
-       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
-       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
-
-       r = set_pmu_single_event_filter(vcpu, e,
-                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
-                                       KVM_PMU_EVENT_ALLOW);
-       TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
-
-       e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
-       r = set_pmu_single_event_filter(vcpu, e,
-                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
-                                       KVM_PMU_EVENT_ALLOW);
-       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
-
-       f = base_event_filter;
-       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(r, "Set invalid action is expected to fail");
-
-       f = base_event_filter;
-       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(r, "Set invalid flags is expected to fail");
-
-       f = base_event_filter;
-       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
-
-       f = base_event_filter;
-       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
-}
-
-static void intel_run_fixed_counter_guest_code(uint8_t idx)
-{
-       for (;;) {
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
-
-               /* Only OS_EN bit is enabled for fixed counter[idx]. */
-               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
-               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
-       }
-}
-
-static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
-                                              uint32_t action, uint32_t bitmap)
-{
-       struct __kvm_pmu_event_filter f = {
-               .action = action,
-               .fixed_counter_bitmap = bitmap,
-       };
-       set_pmu_event_filter(vcpu, &f);
-
-       return run_vcpu_to_sync(vcpu);
-}
-
-static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
-                                                  uint32_t action,
-                                                  uint32_t bitmap)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = action;
-       f.fixed_counter_bitmap = bitmap;
-       set_pmu_event_filter(vcpu, &f);
-
-       return run_vcpu_to_sync(vcpu);
-}
-
-static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
-                                       uint8_t nr_fixed_counters)
-{
-       unsigned int i;
-       uint32_t bitmap;
-       uint64_t count;
-
-       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
-                   "Invalid nr_fixed_counters");
-
-       /*
-        * Check the fixed performance counter can count normally when KVM
-        * userspace doesn't set any pmu filter.
-        */
-       count = run_vcpu_to_sync(vcpu);
-       TEST_ASSERT(count, "Unexpected count value: %ld", count);
-
-       for (i = 0; i < BIT(nr_fixed_counters); i++) {
-               bitmap = BIT(i);
-               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
-                                                      bitmap);
-               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
-
-               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
-                                                      bitmap);
-               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
-
-               /*
-                * Check that fixed_counter_bitmap has higher priority than
-                * events[] when both are set.
-                */
-               count = test_set_gp_and_fixed_event_filter(vcpu,
-                                                          KVM_PMU_EVENT_ALLOW,
-                                                          bitmap);
-               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
-
-               count = test_set_gp_and_fixed_event_filter(vcpu,
-                                                          KVM_PMU_EVENT_DENY,
-                                                          bitmap);
-               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
-       }
-}
-
-static void test_fixed_counter_bitmap(void)
-{
-       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       uint8_t idx;
-
-       /*
-        * Check that pmu_event_filter works as expected when it's applied to
-        * fixed performance counters.
-        */
-       for (idx = 0; idx < nr_fixed_counters; idx++) {
-               vm = vm_create_with_one_vcpu(&vcpu,
-                                            intel_run_fixed_counter_guest_code);
-               vcpu_args_set(vcpu, 1, idx);
-               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
-               kvm_vm_free(vm);
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       void (*guest_code)(void);
-       struct kvm_vcpu *vcpu, *vcpu2 = NULL;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_is_pmu_enabled());
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
-
-       TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
-       guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       TEST_REQUIRE(sanity_check_pmu(vcpu));
-
-       if (use_amd_pmu())
-               test_amd_deny_list(vcpu);
-
-       test_without_filter(vcpu);
-       test_member_deny_list(vcpu);
-       test_member_allow_list(vcpu);
-       test_not_member_deny_list(vcpu);
-       test_not_member_allow_list(vcpu);
-
-       if (use_intel_pmu() &&
-           supports_event_mem_inst_retired() &&
-           kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
-               vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
-       else if (use_amd_pmu())
-               vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
-
-       if (vcpu2)
-               test_masked_events(vcpu2);
-       test_filter_ioctl(vcpu);
-
-       kvm_vm_free(vm);
-
-       test_pmu_config_disable(guest_code);
-       test_fixed_counter_bitmap();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c

deleted file mode 100644 (file)

index 82a8d88..0000000
--- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
+++ /dev/null
@@ -1,483 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <limits.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/kvm_para.h>
-#include <linux/memfd.h>
-#include <linux/sizes.h>
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-#define BASE_DATA_SLOT         10
-#define BASE_DATA_GPA          ((uint64_t)(1ull << 32))
-#define PER_CPU_DATA_SIZE      ((uint64_t)(SZ_2M + PAGE_SIZE))
-
-/* Horrific macro so that the line info is captured accurately :-( */
-#define memcmp_g(gpa, pattern,  size)                                                          \
-do {                                                                                           \
-       uint8_t *mem = (uint8_t *)gpa;                                                          \
-       size_t i;                                                                               \
-                                                                                               \
-       for (i = 0; i < size; i++)                                                              \
-               __GUEST_ASSERT(mem[i] == pattern,                                               \
-                              "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x",       \
-                              pattern, i, gpa + i, mem[i]);                                    \
-} while (0)
-
-static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
-{
-       size_t i;
-
-       for (i = 0; i < size; i++)
-               TEST_ASSERT(mem[i] == pattern,
-                           "Host expected 0x%x at gpa 0x%lx, got 0x%x",
-                           pattern, gpa + i, mem[i]);
-}
-
-/*
- * Run memory conversion tests with explicit conversion:
- * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
- * to back/unback private memory. Subsequent accesses by guest to the gpa range
- * will not cause exit to userspace.
- *
- * Test memory conversion scenarios with following steps:
- * 1) Access private memory using private access and verify that memory contents
- *   are not visible to userspace.
- * 2) Convert memory to shared using explicit conversions and ensure that
- *   userspace is able to access the shared regions.
- * 3) Convert memory back to private using explicit conversions and ensure that
- *   userspace is again not able to access converted private regions.
- */
-
-#define GUEST_STAGE(o, s) { .offset = o, .size = s }
-
-enum ucall_syncs {
-       SYNC_SHARED,
-       SYNC_PRIVATE,
-};
-
-static void guest_sync_shared(uint64_t gpa, uint64_t size,
-                             uint8_t current_pattern, uint8_t new_pattern)
-{
-       GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
-}
-
-static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
-{
-       GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
-}
-
-/* Arbitrary values, KVM doesn't care about the attribute flags. */
-#define MAP_GPA_SET_ATTRIBUTES BIT(0)
-#define MAP_GPA_SHARED         BIT(1)
-#define MAP_GPA_DO_FALLOCATE   BIT(2)
-
-static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
-                         bool do_fallocate)
-{
-       uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
-
-       if (map_shared)
-               flags |= MAP_GPA_SHARED;
-       if (do_fallocate)
-               flags |= MAP_GPA_DO_FALLOCATE;
-       kvm_hypercall_map_gpa_range(gpa, size, flags);
-}
-
-static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
-{
-       guest_map_mem(gpa, size, true, do_fallocate);
-}
-
-static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
-{
-       guest_map_mem(gpa, size, false, do_fallocate);
-}
-
-struct {
-       uint64_t offset;
-       uint64_t size;
-} static const test_ranges[] = {
-       GUEST_STAGE(0, PAGE_SIZE),
-       GUEST_STAGE(0, SZ_2M),
-       GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
-       GUEST_STAGE(PAGE_SIZE, SZ_2M),
-       GUEST_STAGE(SZ_2M, PAGE_SIZE),
-};
-
-static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
-{
-       const uint8_t def_p = 0xaa;
-       const uint8_t init_p = 0xcc;
-       uint64_t j;
-       int i;
-
-       /* Memory should be shared by default. */
-       memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
-       memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
-       guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
-
-       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
-
-       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
-               uint64_t gpa = base_gpa + test_ranges[i].offset;
-               uint64_t size = test_ranges[i].size;
-               uint8_t p1 = 0x11;
-               uint8_t p2 = 0x22;
-               uint8_t p3 = 0x33;
-               uint8_t p4 = 0x44;
-
-               /*
-                * Set the test region to pattern one to differentiate it from
-                * the data range as a whole (contains the initial pattern).
-                */
-               memset((void *)gpa, p1, size);
-
-               /*
-                * Convert to private, set and verify the private data, and
-                * then verify that the rest of the data (map shared) still
-                * holds the initial pattern, and that the host always sees the
-                * shared memory (initial pattern).  Unlike shared memory,
-                * punching a hole in private memory is destructive, i.e.
-                * previous values aren't guaranteed to be preserved.
-                */
-               guest_map_private(gpa, size, do_fallocate);
-
-               if (size > PAGE_SIZE) {
-                       memset((void *)gpa, p2, PAGE_SIZE);
-                       goto skip;
-               }
-
-               memset((void *)gpa, p2, size);
-               guest_sync_private(gpa, size, p1);
-
-               /*
-                * Verify that the private memory was set to pattern two, and
-                * that shared memory still holds the initial pattern.
-                */
-               memcmp_g(gpa, p2, size);
-               if (gpa > base_gpa)
-                       memcmp_g(base_gpa, init_p, gpa - base_gpa);
-               if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
-                       memcmp_g(gpa + size, init_p,
-                                (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
-
-               /*
-                * Convert odd-number page frames back to shared to verify KVM
-                * also correctly handles holes in private ranges.
-                */
-               for (j = 0; j < size; j += PAGE_SIZE) {
-                       if ((j >> PAGE_SHIFT) & 1) {
-                               guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
-                               guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
-
-                               memcmp_g(gpa + j, p3, PAGE_SIZE);
-                       } else {
-                               guest_sync_private(gpa + j, PAGE_SIZE, p1);
-                       }
-               }
-
-skip:
-               /*
-                * Convert the entire region back to shared, explicitly write
-                * pattern three to fill in the even-number frames before
-                * asking the host to verify (and write pattern four).
-                */
-               guest_map_shared(gpa, size, do_fallocate);
-               memset((void *)gpa, p3, size);
-               guest_sync_shared(gpa, size, p3, p4);
-               memcmp_g(gpa, p4, size);
-
-               /* Reset the shared memory back to the initial pattern. */
-               memset((void *)gpa, init_p, size);
-
-               /*
-                * Free (via PUNCH_HOLE) *all* private memory so that the next
-                * iteration starts from a clean slate, e.g. with respect to
-                * whether or not there are pages/folios in guest_mem.
-                */
-               guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
-       }
-}
-
-static void guest_punch_hole(uint64_t gpa, uint64_t size)
-{
-       /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
-       uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
-
-       kvm_hypercall_map_gpa_range(gpa, size, flags);
-}
-
-/*
- * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
- * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
- * (subsequent fault) should zero memory.
- */
-static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
-{
-       const uint8_t init_p = 0xcc;
-       int i;
-
-       /*
-        * Convert the entire range to private, this testcase is all about
-        * punching holes in guest_memfd, i.e. shared mappings aren't needed.
-        */
-       guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
-
-       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
-               uint64_t gpa = base_gpa + test_ranges[i].offset;
-               uint64_t size = test_ranges[i].size;
-
-               /*
-                * Free all memory before each iteration, even for the !precise
-                * case where the memory will be faulted back in.  Freeing and
-                * reallocating should obviously work, and freeing all memory
-                * minimizes the probability of cross-testcase influence.
-                */
-               guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
-
-               /* Fault-in and initialize memory, and verify the pattern. */
-               if (precise) {
-                       memset((void *)gpa, init_p, size);
-                       memcmp_g(gpa, init_p, size);
-               } else {
-                       memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
-                       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
-               }
-
-               /*
-                * Punch a hole at the target range and verify that reads from
-                * the guest succeed and return zeroes.
-                */
-               guest_punch_hole(gpa, size);
-               memcmp_g(gpa, 0, size);
-       }
-}
-
-static void guest_code(uint64_t base_gpa)
-{
-       /*
-        * Run the conversion test twice, with and without doing fallocate() on
-        * the guest_memfd backing when converting between shared and private.
-        */
-       guest_test_explicit_conversion(base_gpa, false);
-       guest_test_explicit_conversion(base_gpa, true);
-
-       /*
-        * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
-        * faulted in, once with only the target range faulted in.
-        */
-       guest_test_punch_hole(base_gpa, false);
-       guest_test_punch_hole(base_gpa, true);
-       GUEST_DONE();
-}
-
-static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       uint64_t gpa = run->hypercall.args[0];
-       uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
-       bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
-       bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
-       bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
-       struct kvm_vm *vm = vcpu->vm;
-
-       TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
-                   "Wanted MAP_GPA_RANGE (%u), got '%llu'",
-                   KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
-
-       if (do_fallocate)
-               vm_guest_mem_fallocate(vm, gpa, size, map_shared);
-
-       if (set_attributes)
-               vm_set_memory_attributes(vm, gpa, size,
-                                        map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
-       run->hypercall.ret = 0;
-}
-
-static bool run_vcpus;
-
-static void *__test_mem_conversions(void *__vcpu)
-{
-       struct kvm_vcpu *vcpu = __vcpu;
-       struct kvm_run *run = vcpu->run;
-       struct kvm_vm *vm = vcpu->vm;
-       struct ucall uc;
-
-       while (!READ_ONCE(run_vcpus))
-               ;
-
-       for ( ;; ) {
-               vcpu_run(vcpu);
-
-               if (run->exit_reason == KVM_EXIT_HYPERCALL) {
-                       handle_exit_hypercall(vcpu);
-                       continue;
-               }
-
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                           "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
-                           run->exit_reason, exit_reason_str(run->exit_reason));
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               case UCALL_SYNC: {
-                       uint64_t gpa  = uc.args[1];
-                       size_t size = uc.args[2];
-                       size_t i;
-
-                       TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
-                                   uc.args[0] == SYNC_PRIVATE,
-                                   "Unknown sync command '%ld'", uc.args[0]);
-
-                       for (i = 0; i < size; i += vm->page_size) {
-                               size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
-                               uint8_t *hva = addr_gpa2hva(vm, gpa + i);
-
-                               /* In all cases, the host should observe the shared data. */
-                               memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
-
-                               /* For shared, write the new pattern to guest memory. */
-                               if (uc.args[0] == SYNC_SHARED)
-                                       memset(hva, uc.args[4], nr_bytes);
-                       }
-                       break;
-               }
-               case UCALL_DONE:
-                       return NULL;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-}
-
-static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
-                                uint32_t nr_memslots)
-{
-       /*
-        * Allocate enough memory so that each vCPU's chunk of memory can be
-        * naturally aligned with respect to the size of the backing store.
-        */
-       const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
-       const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
-       const size_t memfd_size = per_cpu_size * nr_vcpus;
-       const size_t slot_size = memfd_size / nr_memslots;
-       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-       pthread_t threads[KVM_MAX_VCPUS];
-       struct kvm_vm *vm;
-       int memfd, i, r;
-
-       const struct vm_shape shape = {
-               .mode = VM_MODE_DEFAULT,
-               .type = KVM_X86_SW_PROTECTED_VM,
-       };
-
-       TEST_ASSERT(slot_size * nr_memslots == memfd_size,
-                   "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
-                   memfd_size, nr_memslots);
-       vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
-
-       vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
-
-       memfd = vm_create_guest_memfd(vm, memfd_size, 0);
-
-       for (i = 0; i < nr_memslots; i++)
-               vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
-                          BASE_DATA_SLOT + i, slot_size / vm->page_size,
-                          KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
-
-       for (i = 0; i < nr_vcpus; i++) {
-               uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
-
-               vcpu_args_set(vcpus[i], 1, gpa);
-
-               /*
-                * Map only what is needed so that an out-of-bounds access
-                * results #PF => SHUTDOWN instead of data corruption.
-                */
-               virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
-
-               pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
-       }
-
-       WRITE_ONCE(run_vcpus, true);
-
-       for (i = 0; i < nr_vcpus; i++)
-               pthread_join(threads[i], NULL);
-
-       kvm_vm_free(vm);
-
-       /*
-        * Allocate and free memory from the guest_memfd after closing the VM
-        * fd.  The guest_memfd is gifted a reference to its owning VM, i.e.
-        * should prevent the VM from being fully destroyed until the last
-        * reference to the guest_memfd is also put.
-        */
-       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
-       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
-       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
-       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
-       close(memfd);
-}
-
-static void usage(const char *cmd)
-{
-       puts("");
-       printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
-       puts("");
-       backing_src_help("-s");
-       puts("");
-       puts(" -n: specify the number of vcpus (default: 1)");
-       puts("");
-       puts(" -m: specify the number of memslots (default: 1)");
-       puts("");
-}
-
-int main(int argc, char *argv[])
-{
-       enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
-       uint32_t nr_memslots = 1;
-       uint32_t nr_vcpus = 1;
-       int opt;
-
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
-
-       while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
-               switch (opt) {
-               case 's':
-                       src_type = parse_backing_src_type(optarg);
-                       break;
-               case 'n':
-                       nr_vcpus = atoi_positive("nr_vcpus", optarg);
-                       break;
-               case 'm':
-                       nr_memslots = atoi_positive("nr_memslots", optarg);
-                       break;
-               case 'h':
-               default:
-                       usage(argv[0]);
-                       exit(0);
-               }
-       }
-
-       test_mem_conversions(src_type, nr_vcpus, nr_memslots);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c

deleted file mode 100644 (file)

index 13e72fc..0000000
--- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2023, Google LLC.
- */
-#include <linux/kvm.h>
-#include <pthread.h>
-#include <stdint.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-/* Arbitrarily selected to avoid overlaps with anything else */
-#define EXITS_TEST_GVA 0xc0000000
-#define EXITS_TEST_GPA EXITS_TEST_GVA
-#define EXITS_TEST_NPAGES 1
-#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
-#define EXITS_TEST_SLOT 10
-
-static uint64_t guest_repeatedly_read(void)
-{
-       volatile uint64_t value;
-
-       while (true)
-               value = *((uint64_t *) EXITS_TEST_GVA);
-
-       return value;
-}
-
-static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-       int r;
-
-       r = _vcpu_run(vcpu);
-       if (r) {
-               TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
-               TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
-       }
-       return vcpu->run->exit_reason;
-}
-
-const struct vm_shape protected_vm_shape = {
-       .mode = VM_MODE_DEFAULT,
-       .type = KVM_X86_SW_PROTECTED_VM,
-};
-
-static void test_private_access_memslot_deleted(void)
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       pthread_t vm_thread;
-       void *thread_return;
-       uint32_t exit_reason;
-
-       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
-                                          guest_repeatedly_read);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
-                                   EXITS_TEST_NPAGES,
-                                   KVM_MEM_GUEST_MEMFD);
-
-       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
-
-       /* Request to access page privately */
-       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
-
-       pthread_create(&vm_thread, NULL,
-                      (void *(*)(void *))run_vcpu_get_exit_reason,
-                      (void *)vcpu);
-
-       vm_mem_region_delete(vm, EXITS_TEST_SLOT);
-
-       pthread_join(vm_thread, &thread_return);
-       exit_reason = (uint32_t)(uint64_t)thread_return;
-
-       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
-
-       kvm_vm_free(vm);
-}
-
-static void test_private_access_memslot_not_private(void)
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       uint32_t exit_reason;
-
-       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
-                                          guest_repeatedly_read);
-
-       /* Add a non-private memslot (flags = 0) */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
-                                   EXITS_TEST_NPAGES, 0);
-
-       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
-
-       /* Request to access page privately */
-       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
-
-       exit_reason = run_vcpu_get_exit_reason(vcpu);
-
-       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
-
-       test_private_access_memslot_deleted();
-       test_private_access_memslot_not_private();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c

deleted file mode 100644 (file)

index cbc92a8..0000000
--- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test edge cases and race conditions in kvm_recalculate_apic_map().
- */
-
-#include <sys/ioctl.h>
-#include <pthread.h>
-#include <time.h>
-
-#include "processor.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "apic.h"
-
-#define TIMEOUT                5       /* seconds */
-
-#define LAPIC_DISABLED 0
-#define LAPIC_X2APIC   (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
-#define MAX_XAPIC_ID   0xff
-
-static void *race(void *arg)
-{
-       struct kvm_lapic_state lapic = {};
-       struct kvm_vcpu *vcpu = arg;
-
-       while (1) {
-               /* Trigger kvm_recalculate_apic_map(). */
-               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-       struct kvm_vcpu *vcpuN;
-       struct kvm_vm *vm;
-       pthread_t thread;
-       time_t t;
-       int i;
-
-       kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
-
-       /*
-        * Create the max number of vCPUs supported by selftests so that KVM
-        * has decent amount of work to do when recalculating the map, i.e. to
-        * make the problematic window large enough to hit.
-        */
-       vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
-
-       /*
-        * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
-        * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
-        */
-       for (i = 0; i < KVM_MAX_VCPUS; i++)
-               vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
-
-       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
-
-       vcpuN = vcpus[KVM_MAX_VCPUS - 1];
-       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
-               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
-               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
-       }
-
-       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
-       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c

deleted file mode 100644 (file)

index 4991378..0000000
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test that KVM_SET_BOOT_CPU_ID works as intended
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "apic.h"
-
-static void guest_bsp_vcpu(void *arg)
-{
-       GUEST_SYNC(1);
-
-       GUEST_ASSERT_NE(get_bsp_flag(), 0);
-
-       GUEST_DONE();
-}
-
-static void guest_not_bsp_vcpu(void *arg)
-{
-       GUEST_SYNC(1);
-
-       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
-
-       GUEST_DONE();
-}
-
-static void test_set_invalid_bsp(struct kvm_vm *vm)
-{
-       unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
-       int r;
-
-       if (max_vcpu_id) {
-               r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
-               TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
-       }
-
-       r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
-       TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
-}
-
-static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
-{
-       int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
-                          (void *)(unsigned long)vcpu->id);
-
-       TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-       int stage;
-
-       for (stage = 0; stage < 2; stage++) {
-
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                                       uc.args[1] == stage + 1,
-                                       "Stage %d: Unexpected register values vmexit, got %lx",
-                                       stage + 1, (ulong)uc.args[1]);
-                       test_set_bsp_busy(vcpu, "while running vm");
-                       break;
-               case UCALL_DONE:
-                       TEST_ASSERT(stage == 1,
-                                       "Expected GUEST_DONE in stage 2, got stage %d",
-                                       stage);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_ASSERT(false, "Unexpected exit: %s",
-                                   exit_reason_str(vcpu->run->exit_reason));
-               }
-       }
-}
-
-static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
-                               struct kvm_vcpu *vcpus[])
-{
-       struct kvm_vm *vm;
-       uint32_t i;
-
-       vm = vm_create(nr_vcpus);
-
-       test_set_invalid_bsp(vm);
-
-       vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
-
-       for (i = 0; i < nr_vcpus; i++)
-               vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
-                                                                guest_not_bsp_vcpu);
-       return vm;
-}
-
-static void run_vm_bsp(uint32_t bsp_vcpu_id)
-{
-       struct kvm_vcpu *vcpus[2];
-       struct kvm_vm *vm;
-
-       vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
-
-       run_vcpu(vcpus[0]);
-       run_vcpu(vcpus[1]);
-
-       kvm_vm_free(vm);
-}
-
-static void check_set_bsp_busy(void)
-{
-       struct kvm_vcpu *vcpus[2];
-       struct kvm_vm *vm;
-
-       vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
-
-       test_set_bsp_busy(vcpus[1], "after adding vcpu");
-
-       run_vcpu(vcpus[0]);
-       run_vcpu(vcpus[1]);
-
-       test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
-
-       run_vm_bsp(0);
-       run_vm_bsp(1);
-       run_vm_bsp(0);
-
-       check_set_bsp_busy();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c

deleted file mode 100644 (file)

index c021c07..0000000
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_SET_SREGS tests
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This is a regression test for the bug fixed by the following commit:
- * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
- *
- * That bug allowed a user-mode program that called the KVM_SET_SREGS
- * ioctl to put a VCPU's local APIC into an invalid state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit)                               \
-do {                                                                           \
-       struct kvm_sregs new;                                                   \
-       int rc;                                                                 \
-                                                                               \
-       /* Skip the sub-test, the feature/bit is supported. */                  \
-       if (orig.cr & bit)                                                      \
-               break;                                                          \
-                                                                               \
-       memcpy(&new, &orig, sizeof(sregs));                                     \
-       new.cr |= bit;                                                          \
-                                                                               \
-       rc = _vcpu_sregs_set(vcpu, &new);                                       \
-       TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit);        \
-                                                                               \
-       /* Sanity check that KVM didn't change anything. */                     \
-       vcpu_sregs_get(vcpu, &new);                                             \
-       TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs");   \
-} while (0)
-
-static uint64_t calc_supported_cr4_feature_bits(void)
-{
-       uint64_t cr4;
-
-       cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
-             X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
-             X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
-       if (kvm_cpu_has(X86_FEATURE_UMIP))
-               cr4 |= X86_CR4_UMIP;
-       if (kvm_cpu_has(X86_FEATURE_LA57))
-               cr4 |= X86_CR4_LA57;
-       if (kvm_cpu_has(X86_FEATURE_VMX))
-               cr4 |= X86_CR4_VMXE;
-       if (kvm_cpu_has(X86_FEATURE_SMX))
-               cr4 |= X86_CR4_SMXE;
-       if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
-               cr4 |= X86_CR4_FSGSBASE;
-       if (kvm_cpu_has(X86_FEATURE_PCID))
-               cr4 |= X86_CR4_PCIDE;
-       if (kvm_cpu_has(X86_FEATURE_XSAVE))
-               cr4 |= X86_CR4_OSXSAVE;
-       if (kvm_cpu_has(X86_FEATURE_SMEP))
-               cr4 |= X86_CR4_SMEP;
-       if (kvm_cpu_has(X86_FEATURE_SMAP))
-               cr4 |= X86_CR4_SMAP;
-       if (kvm_cpu_has(X86_FEATURE_PKU))
-               cr4 |= X86_CR4_PKE;
-
-       return cr4;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_sregs sregs;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t cr4;
-       int rc, i;
-
-       /*
-        * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
-        * use it to verify all supported CR4 bits can be set prior to defining
-        * the vCPU model, i.e. without doing KVM_SET_CPUID2.
-        */
-       vm = vm_create_barebones();
-       vcpu = __vm_vcpu_add(vm, 0);
-
-       vcpu_sregs_get(vcpu, &sregs);
-
-       sregs.cr0 = 0;
-       sregs.cr4 |= calc_supported_cr4_feature_bits();
-       cr4 = sregs.cr4;
-
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
-                   sregs.cr4, cr4);
-
-       /* Verify all unsupported features are rejected by KVM. */
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
-
-       for (i = 32; i < 64; i++)
-               TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
-
-       /* NW without CD is illegal, as is PG without PE. */
-       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
-       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
-
-       kvm_vm_free(vm);
-
-       /* Create a "real" VM and verify APIC_BASE can be set. */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs.apic_base = 1 << 10;
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
-                   sregs.apic_base);
-       sregs.apic_base = 1 << 11;
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
-                   sregs.apic_base);
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c

deleted file mode 100644 (file)

index 3fb967f..0000000
--- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
+++ /dev/null
@@ -1,152 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kvm.h>
-#include <linux/psp-sev.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <pthread.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "kselftest.h"
-
-#define SVM_SEV_FEAT_DEBUG_SWAP 32u
-
-/*
- * Some features may have hidden dependencies, or may only work
- * for certain VM types.  Err on the side of safety and don't
- * expect that all supported features can be passed one by one
- * to KVM_SEV_INIT2.
- *
- * (Well, right now there's only one...)
- */
-#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
-
-int kvm_fd;
-u64 supported_vmsa_features;
-bool have_sev_es;
-
-static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
-{
-       struct kvm_sev_cmd cmd = {
-               .id = cmd_id,
-               .data = (uint64_t)data,
-               .sev_fd = open_sev_dev_path_or_exit(),
-       };
-       int ret;
-
-       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
-       TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
-                   "%d failed: fw error: %d\n",
-                   cmd_id, cmd.error);
-
-       return ret;
-}
-
-static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
-{
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones_type(vm_type);
-       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
-       TEST_ASSERT(ret == 0,
-                   "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
-                   ret, errno);
-       kvm_vm_free(vm);
-}
-
-static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
-{
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones_type(vm_type);
-       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
-       TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "KVM_SEV_INIT2 should fail, %s.",
-                   msg);
-       kvm_vm_free(vm);
-}
-
-void test_vm_types(void)
-{
-       test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
-
-       /*
-        * TODO: check that unsupported types cannot be created.  Probably
-        * a separate selftest.
-        */
-       if (have_sev_es)
-               test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
-
-       test_init2_invalid(0, &(struct kvm_sev_init){},
-                          "VM type is KVM_X86_DEFAULT_VM");
-       if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
-               test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
-                                  "VM type is KVM_X86_SW_PROTECTED_VM");
-}
-
-void test_flags(uint32_t vm_type)
-{
-       int i;
-
-       for (i = 0; i < 32; i++)
-               test_init2_invalid(vm_type,
-                       &(struct kvm_sev_init){ .flags = BIT(i) },
-                       "invalid flag");
-}
-
-void test_features(uint32_t vm_type, uint64_t supported_features)
-{
-       int i;
-
-       for (i = 0; i < 64; i++) {
-               if (!(supported_features & BIT_ULL(i)))
-                       test_init2_invalid(vm_type,
-                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
-                               "unknown feature");
-               else if (KNOWN_FEATURES & BIT_ULL(i))
-                       test_init2(vm_type,
-                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       int kvm_fd = open_kvm_dev_path_or_exit();
-       bool have_sev;
-
-       TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
-                                          KVM_X86_SEV_VMSA_FEATURES) == 0);
-       kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
-                           KVM_X86_SEV_VMSA_FEATURES,
-                           &supported_vmsa_features);
-
-       have_sev = kvm_cpu_has(X86_FEATURE_SEV);
-       TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
-                   "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
-                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
-
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
-       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
-
-       TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
-                   "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
-                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
-
-       test_vm_types();
-
-       test_flags(KVM_X86_SEV_VM);
-       if (have_sev_es)
-               test_flags(KVM_X86_SEV_ES_VM);
-
-       test_features(KVM_X86_SEV_VM, 0);
-       if (have_sev_es)
-               test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c

deleted file mode 100644 (file)

index 0a6dfba..0000000
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ /dev/null
@@ -1,397 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kvm.h>
-#include <linux/psp-sev.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <pthread.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sev.h"
-#include "kselftest.h"
-
-#define NR_MIGRATE_TEST_VCPUS 4
-#define NR_MIGRATE_TEST_VMS 3
-#define NR_LOCK_TESTING_THREADS 3
-#define NR_LOCK_TESTING_ITERATIONS 10000
-
-bool have_sev_es;
-
-static struct kvm_vm *sev_vm_create(bool es)
-{
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_barebones();
-       if (!es)
-               sev_vm_init(vm);
-       else
-               sev_es_vm_init(vm);
-
-       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
-               __vm_vcpu_add(vm, i);
-
-       sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
-
-       if (es)
-               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-       return vm;
-}
-
-static struct kvm_vm *aux_vm_create(bool with_vcpus)
-{
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_barebones();
-       if (!with_vcpus)
-               return vm;
-
-       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
-               __vm_vcpu_add(vm, i);
-
-       return vm;
-}
-
-static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
-}
-
-
-static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       int ret;
-
-       ret = __sev_migrate_from(dst, src);
-       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
-}
-
-static void test_sev_migrate_from(bool es)
-{
-       struct kvm_vm *src_vm;
-       struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
-       int i, ret;
-
-       src_vm = sev_vm_create(es);
-       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
-               dst_vms[i] = aux_vm_create(true);
-
-       /* Initial migration from the src to the first dst. */
-       sev_migrate_from(dst_vms[0], src_vm);
-
-       for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
-               sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
-
-       /* Migrate the guest back to the original VM. */
-       ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
-       TEST_ASSERT(ret == -1 && errno == EIO,
-                   "VM that was migrated from should be dead. ret %d, errno: %d", ret,
-                   errno);
-
-       kvm_vm_free(src_vm);
-       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
-               kvm_vm_free(dst_vms[i]);
-}
-
-struct locking_thread_input {
-       struct kvm_vm *vm;
-       struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
-};
-
-static void *locking_test_thread(void *arg)
-{
-       int i, j;
-       struct locking_thread_input *input = (struct locking_thread_input *)arg;
-
-       for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
-               j = i % NR_LOCK_TESTING_THREADS;
-               __sev_migrate_from(input->vm, input->source_vms[j]);
-       }
-
-       return NULL;
-}
-
-static void test_sev_migrate_locking(void)
-{
-       struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
-       pthread_t pt[NR_LOCK_TESTING_THREADS];
-       int i;
-
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
-               input[i].vm = sev_vm_create(/* es= */ false);
-               input[0].source_vms[i] = input[i].vm;
-       }
-       for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
-               memcpy(input[i].source_vms, input[0].source_vms,
-                      sizeof(input[i].source_vms));
-
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
-               pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
-
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
-               pthread_join(pt[i], NULL);
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
-               kvm_vm_free(input[i].vm);
-}
-
-static void test_sev_migrate_parameters(void)
-{
-       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
-               *sev_es_vm_no_vmsa;
-       int ret;
-
-       vm_no_vcpu = vm_create_barebones();
-       vm_no_sev = aux_vm_create(true);
-       ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
-       TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "Migrations require SEV enabled. ret %d, errno: %d", ret,
-                   errno);
-
-       if (!have_sev_es)
-               goto out;
-
-       sev_vm = sev_vm_create(/* es= */ false);
-       sev_es_vm = sev_vm_create(/* es= */ true);
-       sev_es_vm_no_vmsa = vm_create_barebones();
-       sev_es_vm_init(sev_es_vm_no_vmsa);
-       __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
-
-       ret = __sev_migrate_from(sev_vm, sev_es_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_migrate_from(sev_es_vm, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
-               ret, errno);
-
-       kvm_vm_free(sev_vm);
-       kvm_vm_free(sev_es_vm);
-       kvm_vm_free(sev_es_vm_no_vmsa);
-out:
-       kvm_vm_free(vm_no_vcpu);
-       kvm_vm_free(vm_no_sev);
-}
-
-static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
-}
-
-
-static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       int ret;
-
-       ret = __sev_mirror_create(dst, src);
-       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
-}
-
-static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
-{
-       struct kvm_sev_guest_status status;
-       int cmd_id;
-
-       for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
-               int ret;
-
-               /*
-                * These commands are allowed for mirror VMs, all others are
-                * not.
-                */
-               switch (cmd_id) {
-               case KVM_SEV_LAUNCH_UPDATE_VMSA:
-               case KVM_SEV_GUEST_STATUS:
-               case KVM_SEV_DBG_DECRYPT:
-               case KVM_SEV_DBG_ENCRYPT:
-                       continue;
-               default:
-                       break;
-               }
-
-               /*
-                * These commands should be disallowed before the data
-                * parameter is examined so NULL is OK here.
-                */
-               ret = __vm_sev_ioctl(vm, cmd_id, NULL);
-               TEST_ASSERT(
-                       ret == -1 && errno == EINVAL,
-                       "Should not be able call command: %d. ret: %d, errno: %d",
-                       cmd_id, ret, errno);
-       }
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-}
-
-static void test_sev_mirror(bool es)
-{
-       struct kvm_vm *src_vm, *dst_vm;
-       int i;
-
-       src_vm = sev_vm_create(es);
-       dst_vm = aux_vm_create(false);
-
-       sev_mirror_create(dst_vm, src_vm);
-
-       /* Check that we can complete creation of the mirror VM.  */
-       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
-               __vm_vcpu_add(dst_vm, i);
-
-       if (es)
-               vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-
-       verify_mirror_allowed_cmds(dst_vm);
-
-       kvm_vm_free(src_vm);
-       kvm_vm_free(dst_vm);
-}
-
-static void test_sev_mirror_parameters(void)
-{
-       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
-       int ret;
-
-       sev_vm = sev_vm_create(/* es= */ false);
-       vm_with_vcpu = aux_vm_create(true);
-       vm_no_vcpu = aux_vm_create(false);
-
-       ret = __sev_mirror_create(sev_vm, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able copy context to self. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
-       TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "Copy context requires SEV enabled. ret %d, errno: %d", ret,
-                   errno);
-
-       ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
-               ret, errno);
-
-       if (!have_sev_es)
-               goto out;
-
-       sev_es_vm = sev_vm_create(/* es= */ true);
-       ret = __sev_mirror_create(sev_vm, sev_es_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_mirror_create(sev_es_vm, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       kvm_vm_free(sev_es_vm);
-
-out:
-       kvm_vm_free(sev_vm);
-       kvm_vm_free(vm_with_vcpu);
-       kvm_vm_free(vm_no_vcpu);
-}
-
-static void test_sev_move_copy(void)
-{
-       struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
-                     *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
-
-       sev_vm = sev_vm_create(/* es= */ false);
-       dst_vm = aux_vm_create(true);
-       dst2_vm = aux_vm_create(true);
-       dst3_vm = aux_vm_create(true);
-       mirror_vm = aux_vm_create(false);
-       dst_mirror_vm = aux_vm_create(false);
-       dst2_mirror_vm = aux_vm_create(false);
-       dst3_mirror_vm = aux_vm_create(false);
-
-       sev_mirror_create(mirror_vm, sev_vm);
-
-       sev_migrate_from(dst_mirror_vm, mirror_vm);
-       sev_migrate_from(dst_vm, sev_vm);
-
-       sev_migrate_from(dst2_vm, dst_vm);
-       sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
-
-       sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
-       sev_migrate_from(dst3_vm, dst2_vm);
-
-       kvm_vm_free(dst_vm);
-       kvm_vm_free(sev_vm);
-       kvm_vm_free(dst2_vm);
-       kvm_vm_free(dst3_vm);
-       kvm_vm_free(mirror_vm);
-       kvm_vm_free(dst_mirror_vm);
-       kvm_vm_free(dst2_mirror_vm);
-       kvm_vm_free(dst3_mirror_vm);
-
-       /*
-        * Run similar test be destroy mirrors before mirrored VMs to ensure
-        * destruction is done safely.
-        */
-       sev_vm = sev_vm_create(/* es= */ false);
-       dst_vm = aux_vm_create(true);
-       mirror_vm = aux_vm_create(false);
-       dst_mirror_vm = aux_vm_create(false);
-
-       sev_mirror_create(mirror_vm, sev_vm);
-
-       sev_migrate_from(dst_mirror_vm, mirror_vm);
-       sev_migrate_from(dst_vm, sev_vm);
-
-       kvm_vm_free(mirror_vm);
-       kvm_vm_free(dst_mirror_vm);
-       kvm_vm_free(dst_vm);
-       kvm_vm_free(sev_vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
-       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
-
-       if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
-               test_sev_migrate_from(/* es= */ false);
-               if (have_sev_es)
-                       test_sev_migrate_from(/* es= */ true);
-               test_sev_migrate_locking();
-               test_sev_migrate_parameters();
-               if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
-                       test_sev_move_copy();
-       }
-       if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
-               test_sev_mirror(/* es= */ false);
-               if (have_sev_es)
-                       test_sev_mirror(/* es= */ true);
-               test_sev_mirror_parameters();
-       }
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c

deleted file mode 100644 (file)

index ae77698..0000000
--- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
+++ /dev/null
@@ -1,205 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <math.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "linux/psp-sev.h"
-#include "sev.h"
-
-
-#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
-
-static void guest_sev_es_code(void)
-{
-       /* TODO: Check CPUID after GHCB-based hypercall support is added. */
-       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
-       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
-
-       /*
-        * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
-        * force "termination" to signal "done" via the GHCB MSR protocol.
-        */
-       wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
-       __asm__ __volatile__("rep; vmmcall");
-}
-
-static void guest_sev_code(void)
-{
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
-       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
-
-       GUEST_DONE();
-}
-
-/* Stash state passed via VMSA before any compiled code runs.  */
-extern void guest_code_xsave(void);
-asm("guest_code_xsave:\n"
-    "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
-    "xor %edx, %edx\n"
-    "xsave (%rdi)\n"
-    "jmp guest_sev_es_code");
-
-static void compare_xsave(u8 *from_host, u8 *from_guest)
-{
-       int i;
-       bool bad = false;
-       for (i = 0; i < 4095; i++) {
-               if (from_host[i] != from_guest[i]) {
-                       printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
-                       bad = true;
-               }
-       }
-
-       if (bad)
-               abort();
-}
-
-static void test_sync_vmsa(uint32_t policy)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       vm_vaddr_t gva;
-       void *hva;
-
-       double x87val = M_PI;
-       struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
-
-       vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
-       gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
-                                   MEM_REGION_TEST_DATA);
-       hva = addr_gva2hva(vm, gva);
-
-       vcpu_args_set(vcpu, 1, gva);
-
-       asm("fninit\n"
-           "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
-           "fldl %3\n"
-           "xsave (%2)\n"
-           "fstp %%st\n"
-           : "=m"(xsave)
-           : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
-           : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
-       vcpu_xsave_set(vcpu, &xsave);
-
-       vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
-
-       /* This page is shared, so make it decrypted.  */
-       memset(hva, 0, 4096);
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
-                   "Wanted SYSTEM_EVENT, got %s",
-                   exit_reason_str(vcpu->run->exit_reason));
-       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
-       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
-       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
-
-       compare_xsave((u8 *)&xsave, (u8 *)hva);
-
-       kvm_vm_free(vm);
-}
-
-static void test_sev(void *guest_code, uint64_t policy)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
-       vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
-
-       /* TODO: Validate the measurement is as expected. */
-       vm_sev_launch(vm, policy, NULL);
-
-       for (;;) {
-               vcpu_run(vcpu);
-
-               if (policy & SEV_POLICY_ES) {
-                       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
-                                   "Wanted SYSTEM_EVENT, got %s",
-                                   exit_reason_str(vcpu->run->exit_reason));
-                       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
-                       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
-                       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
-                       break;
-               }
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       continue;
-               case UCALL_DONE:
-                       return;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_FAIL("Unexpected exit: %s",
-                                 exit_reason_str(vcpu->run->exit_reason));
-               }
-       }
-
-       kvm_vm_free(vm);
-}
-
-static void guest_shutdown_code(void)
-{
-       struct desc_ptr idt;
-
-       /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
-       memset(&idt, 0, sizeof(idt));
-       __asm__ __volatile__("lidt %0" :: "m"(idt));
-
-       __asm__ __volatile__("ud2");
-}
-
-static void test_sev_es_shutdown(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       uint32_t type = KVM_X86_SEV_ES_VM;
-
-       vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
-
-       vm_sev_launch(vm, SEV_POLICY_ES, NULL);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
-                   "Wanted SHUTDOWN, got %s",
-                   exit_reason_str(vcpu->run->exit_reason));
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       const u64 xf_mask = XFEATURE_MASK_X87_AVX;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
-       test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
-       test_sev(guest_sev_code, 0);
-
-       if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
-               test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
-               test_sev(guest_sev_es_code, SEV_POLICY_ES);
-
-               test_sev_es_shutdown();
-
-               if (kvm_has_cap(KVM_CAP_XCRS) &&
-                   (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
-                       test_sync_vmsa(0);
-                       test_sync_vmsa(SEV_POLICY_NO_DBG);
-               }
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c

deleted file mode 100644 (file)

index fabeead..0000000
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Test that KVM emulates instructions in response to EPT violations when
- * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
- */
-#include "flds_emulation.h"
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MAXPHYADDR 36
-
-#define MEM_REGION_GVA 0x0000123456789000
-#define MEM_REGION_GPA 0x0000000700000000
-#define MEM_REGION_SLOT        10
-#define MEM_REGION_SIZE PAGE_SIZE
-
-static void guest_code(bool tdp_enabled)
-{
-       uint64_t error_code;
-       uint64_t vector;
-
-       vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
-
-       /*
-        * When TDP is enabled, flds will trigger an emulation failure, exit to
-        * userspace, and then the selftest host "VMM" skips the instruction.
-        *
-        * When TDP is disabled, no instruction emulation is required so flds
-        * should generate #PF(RSVD).
-        */
-       if (tdp_enabled) {
-               GUEST_ASSERT(!vector);
-       } else {
-               GUEST_ASSERT_EQ(vector, PF_VECTOR);
-               GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
-       }
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       uint64_t *pte;
-       uint64_t *hva;
-       uint64_t gpa;
-       int rc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
-
-       rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
-       TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
-       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   MEM_REGION_GPA, MEM_REGION_SLOT,
-                                   MEM_REGION_SIZE / PAGE_SIZE, 0);
-       gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
-                                MEM_REGION_GPA, MEM_REGION_SLOT);
-       TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
-       virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
-       hva = addr_gpa2hva(vm, MEM_REGION_GPA);
-       memset(hva, 0, PAGE_SIZE);
-
-       pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
-       *pte |= BIT_ULL(MAXPHYADDR);
-
-       vcpu_run(vcpu);
-
-       /*
-        * When TDP is enabled, KVM must emulate in response the guest physical
-        * address that is illegal from the guest's perspective, but is legal
-        * from hardware's perspeective.  This should result in an emulation
-        * failure exit to userspace since KVM doesn't support emulating flds.
-        */
-       if (kvm_is_tdp_enabled()) {
-               handle_flds_emulation_failure_exit(vcpu);
-               vcpu_run(vcpu);
-       }
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
-       }
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c

deleted file mode 100644 (file)

index 55c88d6..0000000
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ /dev/null
@@ -1,209 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for SMM.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "vmx.h"
-#include "svm_util.h"
-
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
-#define SMRAM_GPA 0x1000000
-#define SMRAM_STAGE 0xfe
-
-#define STR(x) #x
-#define XSTR(s) STR(s)
-
-#define SYNC_PORT 0xe
-#define DONE 0xff
-
-/*
- * This is compiled as normal 64-bit code, however, SMI handler is executed
- * in real-address mode. To stay simple we're limiting ourselves to a mode
- * independent subset of asm here.
- * SMI handler always report back fixed stage SMRAM_STAGE.
- */
-uint8_t smi_handler[] = {
-       0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
-       0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
-       0x0f, 0xaa,           /* rsm */
-};
-
-static inline void sync_with_host(uint64_t phase)
-{
-       asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
-                    : "+a" (phase));
-}
-
-static void self_smi(void)
-{
-       x2apic_write_reg(APIC_ICR,
-                        APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
-}
-
-static void l2_guest_code(void)
-{
-       sync_with_host(8);
-
-       sync_with_host(10);
-
-       vmcall();
-}
-
-static void guest_code(void *arg)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
-       struct svm_test_data *svm = arg;
-       struct vmx_pages *vmx_pages = arg;
-
-       sync_with_host(1);
-
-       wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
-
-       sync_with_host(2);
-
-       self_smi();
-
-       sync_with_host(4);
-
-       if (arg) {
-               if (this_cpu_has(X86_FEATURE_SVM)) {
-                       generic_svm_setup(svm, l2_guest_code,
-                                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-               } else {
-                       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-                       GUEST_ASSERT(load_vmcs(vmx_pages));
-                       prepare_vmcs(vmx_pages, l2_guest_code,
-                                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-               }
-
-               sync_with_host(5);
-
-               self_smi();
-
-               sync_with_host(7);
-
-               if (this_cpu_has(X86_FEATURE_SVM)) {
-                       run_guest(svm->vmcb, svm->vmcb_gpa);
-                       run_guest(svm->vmcb, svm->vmcb_gpa);
-               } else {
-                       vmlaunch();
-                       vmresume();
-               }
-
-               /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
-               sync_with_host(12);
-       }
-
-       sync_with_host(DONE);
-}
-
-void inject_smi(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu_events events;
-
-       vcpu_events_get(vcpu, &events);
-
-       events.smi.pending = 1;
-       events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
-       vcpu_events_set(vcpu, &events);
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t nested_gva = 0;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_regs regs;
-       struct kvm_vm *vm;
-       struct kvm_x86_state *state;
-       int stage, stage_reported;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
-                                   SMRAM_MEMSLOT, SMRAM_PAGES, 0);
-       TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
-                   == SMRAM_GPA, "could not allocate guest physical addresses?");
-
-       memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
-       memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
-              sizeof(smi_handler));
-
-       vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
-
-       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
-               if (kvm_cpu_has(X86_FEATURE_SVM))
-                       vcpu_alloc_svm(vm, &nested_gva);
-               else if (kvm_cpu_has(X86_FEATURE_VMX))
-                       vcpu_alloc_vmx(vm, &nested_gva);
-       }
-
-       if (!nested_gva)
-               pr_info("will skip SMM test with VMX enabled\n");
-
-       vcpu_args_set(vcpu, 1, nested_gva);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               memset(&regs, 0, sizeof(regs));
-               vcpu_regs_get(vcpu, &regs);
-
-               stage_reported = regs.rax & 0xff;
-
-               if (stage_reported == DONE)
-                       goto done;
-
-               TEST_ASSERT(stage_reported == stage ||
-                           stage_reported == SMRAM_STAGE,
-                           "Unexpected stage: #%x, got %x",
-                           stage, stage_reported);
-
-               /*
-                * Enter SMM during L2 execution and check that we correctly
-                * return from it. Do not perform save/restore while in SMM yet.
-                */
-               if (stage == 8) {
-                       inject_smi(vcpu);
-                       continue;
-               }
-
-               /*
-                * Perform save/restore while the guest is in SMM triggered
-                * during L2 execution.
-                */
-               if (stage == 10)
-                       inject_smi(vcpu);
-
-               state = vcpu_save_state(vcpu);
-               kvm_vm_release(vm);
-
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-               kvm_x86_state_cleanup(state);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c

deleted file mode 100644 (file)

index 141b7fc..0000000
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ /dev/null
@@ -1,323 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_GET/SET_* tests
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for vCPU state save/restore, including nested guest state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-void svm_l2_guest_code(void)
-{
-       GUEST_SYNC(4);
-       /* Exit to L1 */
-       vmcall();
-       GUEST_SYNC(6);
-       /* Done, exit to L1 and never come back.  */
-       vmcall();
-}
-
-static void svm_l1_guest_code(struct svm_test_data *svm)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       GUEST_ASSERT(svm->vmcb_gpa);
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, svm_l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(3);
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(5);
-       vmcb->save.rip += 3;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(7);
-}
-
-void vmx_l2_guest_code(void)
-{
-       GUEST_SYNC(6);
-
-       /* Exit to L1 */
-       vmcall();
-
-       /* L1 has now set up a shadow VMCS for us.  */
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-       GUEST_SYNC(10);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
-       GUEST_SYNC(11);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
-       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
-       GUEST_SYNC(12);
-
-       /* Done, exit to L1 and never come back.  */
-       vmcall();
-}
-
-static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT(vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_SYNC(3);
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
-       GUEST_SYNC(4);
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
-       prepare_vmcs(vmx_pages, vmx_l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(5);
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       /* Check that the launched state is preserved.  */
-       GUEST_ASSERT(vmlaunch());
-
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_SYNC(7);
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
-
-       vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
-       vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
-
-       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
-       GUEST_ASSERT(vmlaunch());
-       GUEST_SYNC(8);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(vmresume());
-
-       vmwrite(GUEST_RIP, 0xc0ffee);
-       GUEST_SYNC(9);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-
-       GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(vmresume());
-       GUEST_SYNC(13);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(vmresume());
-}
-
-static void __attribute__((__flatten__)) guest_code(void *arg)
-{
-       GUEST_SYNC(1);
-
-       if (this_cpu_has(X86_FEATURE_XSAVE)) {
-               uint64_t supported_xcr0 = this_cpu_supported_xcr0();
-               uint8_t buffer[4096];
-
-               memset(buffer, 0xcc, sizeof(buffer));
-
-               /*
-                * Modify state for all supported xfeatures to take them out of
-                * their "init" state, i.e. to make them show up in XSTATE_BV.
-                *
-                * Note off-by-default features, e.g. AMX, are out of scope for
-                * this particular testcase as they have a different ABI.
-                */
-               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
-               asm volatile ("fincstp");
-
-               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
-               asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
-
-               if (supported_xcr0 & XFEATURE_MASK_YMM)
-                       asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
-
-               if (supported_xcr0 & XFEATURE_MASK_AVX512) {
-                       asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
-                       asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
-                       asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
-               }
-
-               if (this_cpu_has(X86_FEATURE_MPX)) {
-                       uint64_t bounds[2] = { 10, 0xffffffffull };
-                       uint64_t output[2] = { };
-
-                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
-                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
-
-                       /*
-                        * Don't bother trying to get BNDCSR into the INUSE
-                        * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
-                        * managed via XSAVE/XRSTOR, and BNDCFGU can only be
-                        * modified by XRSTOR.  Stuffing XSTATE_BV in the host
-                        * is simpler than doing XRSTOR here in the guest.
-                        *
-                        * However, temporarily enable MPX in BNDCFGS so that
-                        * BNDMOV actually loads BND1.  If MPX isn't *fully*
-                        * enabled, all MPX instructions are treated as NOPs.
-                        *
-                        * Hand encode "bndmov (%rax),%bnd1" as support for MPX
-                        * mnemonics/registers has been removed from gcc and
-                        * clang (and was never fully supported by clang).
-                        */
-                       wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
-                       asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
-                       /*
-                        * Hand encode "bndmov %bnd1, (%rax)" to sanity check
-                        * that BND1 actually got loaded.
-                        */
-                       asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
-                       wrmsr(MSR_IA32_BNDCFGS, 0);
-
-                       GUEST_ASSERT_EQ(bounds[0], output[0]);
-                       GUEST_ASSERT_EQ(bounds[1], output[1]);
-               }
-               if (this_cpu_has(X86_FEATURE_PKU)) {
-                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
-                       set_cr4(get_cr4() | X86_CR4_PKE);
-                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
-
-                       wrpkru(-1u);
-               }
-       }
-
-       GUEST_SYNC(2);
-
-       if (arg) {
-               if (this_cpu_has(X86_FEATURE_SVM))
-                       svm_l1_guest_code(arg);
-               else
-                       vmx_l1_guest_code(arg);
-       }
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       uint64_t *xstate_bv, saved_xstate_bv;
-       vm_vaddr_t nested_gva = 0;
-       struct kvm_cpuid2 empty_cpuid = {};
-       struct kvm_regs regs1, regs2;
-       struct kvm_vcpu *vcpu, *vcpuN;
-       struct kvm_vm *vm;
-       struct kvm_x86_state *state;
-       struct ucall uc;
-       int stage;
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vcpu_regs_get(vcpu, &regs1);
-
-       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
-               if (kvm_cpu_has(X86_FEATURE_SVM))
-                       vcpu_alloc_svm(vm, &nested_gva);
-               else if (kvm_cpu_has(X86_FEATURE_VMX))
-                       vcpu_alloc_vmx(vm, &nested_gva);
-       }
-
-       if (!nested_gva)
-               pr_info("will skip nested state checks\n");
-
-       vcpu_args_set(vcpu, 1, nested_gva);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-               state = vcpu_save_state(vcpu);
-               memset(&regs1, 0, sizeof(regs1));
-               vcpu_regs_get(vcpu, &regs1);
-
-               kvm_vm_release(vm);
-
-               /* Restore state in a new VM.  */
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-
-               /*
-                * Restore XSAVE state in a dummy vCPU, first without doing
-                * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
-                * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
-                * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
-                * load only XSAVE state, MSRs in particular have a much more
-                * convoluted ABI.
-                *
-                * Load two versions of XSAVE state: one with the actual guest
-                * XSAVE state, and one with all supported features forced "on"
-                * in xstate_bv, e.g. to ensure that KVM allows loading all
-                * supported features, even if something goes awry in saving
-                * the original snapshot.
-                */
-               xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
-               saved_xstate_bv = *xstate_bv;
-
-               vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
-               vcpu_xsave_set(vcpuN, state->xsave);
-               *xstate_bv = kvm_cpu_supported_xcr0();
-               vcpu_xsave_set(vcpuN, state->xsave);
-
-               vcpu_init_cpuid(vcpuN, &empty_cpuid);
-               vcpu_xsave_set(vcpuN, state->xsave);
-               *xstate_bv = saved_xstate_bv;
-               vcpu_xsave_set(vcpuN, state->xsave);
-
-               kvm_x86_state_cleanup(state);
-
-               memset(&regs2, 0, sizeof(regs2));
-               vcpu_regs_get(vcpu, &regs2);
-               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                           (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c

deleted file mode 100644 (file)

index 916e042..0000000
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_int_ctl_test
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "apic.h"
-
-bool vintr_irq_called;
-bool intr_irq_called;
-
-#define VINTR_IRQ_NUMBER 0x20
-#define INTR_IRQ_NUMBER 0x30
-
-static void vintr_irq_handler(struct ex_regs *regs)
-{
-       vintr_irq_called = true;
-}
-
-static void intr_irq_handler(struct ex_regs *regs)
-{
-       x2apic_write_reg(APIC_EOI, 0x00);
-       intr_irq_called = true;
-}
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
-       /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
-        * and since L1 didn't enable virtual interrupt masking,
-        * L2 should receive it and not L1.
-        *
-        * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
-        * so it should also receive it after the following 'sti'.
-        */
-       x2apic_write_reg(APIC_ICR,
-               APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
-
-       __asm__ __volatile__(
-               "sti\n"
-               "nop\n"
-       );
-
-       GUEST_ASSERT(vintr_irq_called);
-       GUEST_ASSERT(intr_irq_called);
-
-       __asm__ __volatile__(
-               "vmcall\n"
-       );
-}
-
-static void l1_guest_code(struct svm_test_data *svm)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       x2apic_enable();
-
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* No virtual interrupt masking */
-       vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
-       /* No intercepts for real and virtual interrupts */
-       vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
-
-       /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
-       vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
-       vmcb->control.int_vector = VINTR_IRQ_NUMBER;
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t svm_gva;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
-       vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
-
-       vcpu_alloc_svm(vm, &svm_gva);
-       vcpu_args_set(vcpu, 1, svm_gva);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-               /* NOT REACHED */
-       case UCALL_DONE:
-               goto done;
-       default:
-               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-       }
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c

deleted file mode 100644 (file)

index 00135cb..0000000
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_nested_shutdown_test
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
-       __asm__ __volatile__("ud2");
-}
-
-static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
-
-       idt[6].p   = 0; // #UD is intercepted but its injection will cause #NP
-       idt[11].p  = 0; // #NP is not intercepted and will cause another
-                       // #NP that will be converted to #DF
-       idt[8].p   = 0; // #DF will cause #NP which will cause SHUTDOWN
-
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       /* should not reach here */
-       GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t svm_gva;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vcpu_alloc_svm(vm, &svm_gva);
-
-       vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c

deleted file mode 100644 (file)

index 7b6481d..0000000
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ /dev/null
@@ -1,210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2022 Oracle and/or its affiliates.
- *
- * Based on:
- *   svm_int_ctl_test
- *
- *   Copyright (C) 2021, Red Hat, Inc.
- *
- */
-#include <stdatomic.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "apic.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "test_util.h"
-
-#define INT_NR                 0x20
-
-static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
-
-static unsigned int bp_fired;
-static void guest_bp_handler(struct ex_regs *regs)
-{
-       bp_fired++;
-}
-
-static unsigned int int_fired;
-static void l2_guest_code_int(void);
-
-static void guest_int_handler(struct ex_regs *regs)
-{
-       int_fired++;
-       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
-}
-
-static void l2_guest_code_int(void)
-{
-       GUEST_ASSERT_EQ(int_fired, 1);
-
-       /*
-         * Same as the vmmcall() function, but with a ud2 sneaked after the
-         * vmmcall.  The caller injects an exception with the return address
-         * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
-        * use vmmcall() directly.
-         */
-       __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
-                             : : "a"(0xdeadbeef), "c"(0xbeefdead)
-                             : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                               "r10", "r11", "r12", "r13", "r14", "r15");
-
-       GUEST_ASSERT_EQ(bp_fired, 1);
-       hlt();
-}
-
-static atomic_int nmi_stage;
-#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
-#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
-static void guest_nmi_handler(struct ex_regs *regs)
-{
-       nmi_stage_inc();
-
-       if (nmi_stage_get() == 1) {
-               vmmcall();
-               GUEST_FAIL("Unexpected resume after VMMCALL");
-       } else {
-               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
-               GUEST_DONE();
-       }
-}
-
-static void l2_guest_code_nmi(void)
-{
-       ud2();
-}
-
-static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       if (is_nmi)
-               x2apic_enable();
-
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm,
-                         is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
-       vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
-
-       if (is_nmi) {
-               vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
-       } else {
-               vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
-               /* The return address pushed on stack */
-               vmcb->control.next_rip = vmcb->save.rip;
-       }
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
-                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
-                      vmcb->control.exit_code,
-                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
-
-       if (is_nmi) {
-               clgi();
-               x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
-
-               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
-               nmi_stage_inc();
-
-               stgi();
-               /* self-NMI happens here */
-               while (true)
-                       cpu_relax();
-       }
-
-       /* Skip over VMMCALL */
-       vmcb->save.rip += 3;
-
-       /* Switch to alternate IDT to cause intervening NPF again */
-       vmcb->save.idtr.base = idt_alt;
-       vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
-
-       vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
-       /* The return address pushed on stack, skip over UD2 */
-       vmcb->control.next_rip = vmcb->save.rip + 2;
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
-                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
-                      vmcb->control.exit_code,
-                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
-
-       GUEST_DONE();
-}
-
-static void run_test(bool is_nmi)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       vm_vaddr_t svm_gva;
-       vm_vaddr_t idt_alt_vm;
-       struct kvm_guest_debug debug;
-
-       pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
-       vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
-       vm_install_exception_handler(vm, INT_NR, guest_int_handler);
-
-       vcpu_alloc_svm(vm, &svm_gva);
-
-       if (!is_nmi) {
-               void *idt, *idt_alt;
-
-               idt_alt_vm = vm_vaddr_alloc_page(vm);
-               idt_alt = addr_gva2hva(vm, idt_alt_vm);
-               idt = addr_gva2hva(vm, vm->arch.idt);
-               memcpy(idt_alt, idt, getpagesize());
-       } else {
-               idt_alt_vm = 0;
-       }
-       vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
-
-       memset(&debug, 0, sizeof(debug));
-       vcpu_guest_debug_set(vcpu, &debug);
-
-       struct ucall uc;
-
-       alarm(2);
-       vcpu_run(vcpu);
-       alarm(0);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-               /* NOT REACHED */
-       case UCALL_DONE:
-               goto done;
-       default:
-               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-       }
-done:
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
-                   "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
-
-       atomic_init(&nmi_stage, 0);
-
-       run_test(false);
-       run_test(true);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c

deleted file mode 100644 (file)

index 8a62cca..0000000
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_vmcall_test
- *
- * Copyright (C) 2020, Red Hat, Inc.
- *
- * Nested SVM testing: VMCALL
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct svm_test_data *svm)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t svm_gva;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       vcpu_alloc_svm(vm, &svm_gva);
-       vcpu_args_set(vcpu, 1, svm_gva);
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c

deleted file mode 100644 (file)

index 8fa3948..0000000
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ /dev/null
@@ -1,411 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for x86 KVM_CAP_SYNC_REGS
- *
- * Copyright (C) 2018, Google LLC.
- *
- * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
- * including requesting an invalid register set, updates to/from values
- * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <pthread.h>
-
-#include "kvm_test_harness.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-struct ucall uc_none = {
-       .cmd = UCALL_NONE,
-};
-
-/*
- * ucall is embedded here to protect against compiler reshuffling registers
- * before calling a function. In this test we only need to get KVM_EXIT_IO
- * vmexit and preserve RBX, no additional information is needed.
- */
-void guest_code(void)
-{
-       asm volatile("1: in %[port], %%al\n"
-                    "add $0x1, %%rbx\n"
-                    "jmp 1b"
-                    : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
-                    : "rax", "rbx");
-}
-
-KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
-
-static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
-{
-#define REG_COMPARE(reg) \
-       TEST_ASSERT(left->reg == right->reg, \
-                   "Register " #reg \
-                   " values did not match: 0x%llx, 0x%llx", \
-                   left->reg, right->reg)
-       REG_COMPARE(rax);
-       REG_COMPARE(rbx);
-       REG_COMPARE(rcx);
-       REG_COMPARE(rdx);
-       REG_COMPARE(rsi);
-       REG_COMPARE(rdi);
-       REG_COMPARE(rsp);
-       REG_COMPARE(rbp);
-       REG_COMPARE(r8);
-       REG_COMPARE(r9);
-       REG_COMPARE(r10);
-       REG_COMPARE(r11);
-       REG_COMPARE(r12);
-       REG_COMPARE(r13);
-       REG_COMPARE(r14);
-       REG_COMPARE(r15);
-       REG_COMPARE(rip);
-       REG_COMPARE(rflags);
-#undef REG_COMPARE
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
-{
-}
-
-static void compare_vcpu_events(struct kvm_vcpu_events *left,
-                               struct kvm_vcpu_events *right)
-{
-}
-
-#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
-#define INVALID_SYNC_FIELD 0x80000000
-
-/*
- * Set an exception as pending *and* injected while KVM is processing events.
- * KVM is supposed to ignore/drop pending exceptions if userspace is also
- * requesting that an exception be injected.
- */
-static void *race_events_inj_pen(void *arg)
-{
-       struct kvm_run *run = (struct kvm_run *)arg;
-       struct kvm_vcpu_events *events = &run->s.regs.events;
-
-       WRITE_ONCE(events->exception.nr, UD_VECTOR);
-
-       for (;;) {
-               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
-               WRITE_ONCE(events->flags, 0);
-               WRITE_ONCE(events->exception.injected, 1);
-               WRITE_ONCE(events->exception.pending, 1);
-
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-/*
- * Set an invalid exception vector while KVM is processing events.  KVM is
- * supposed to reject any vector >= 32, as well as NMIs (vector 2).
- */
-static void *race_events_exc(void *arg)
-{
-       struct kvm_run *run = (struct kvm_run *)arg;
-       struct kvm_vcpu_events *events = &run->s.regs.events;
-
-       for (;;) {
-               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
-               WRITE_ONCE(events->flags, 0);
-               WRITE_ONCE(events->exception.nr, UD_VECTOR);
-               WRITE_ONCE(events->exception.pending, 1);
-               WRITE_ONCE(events->exception.nr, 255);
-
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-/*
- * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
- * illegal, and KVM's MMU heavily relies on vCPU state being valid.
- */
-static noinline void *race_sregs_cr4(void *arg)
-{
-       struct kvm_run *run = (struct kvm_run *)arg;
-       __u64 *cr4 = &run->s.regs.sregs.cr4;
-       __u64 pae_enabled = *cr4;
-       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
-
-       for (;;) {
-               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
-               WRITE_ONCE(*cr4, pae_enabled);
-               asm volatile(".rept 512\n\t"
-                            "nop\n\t"
-                            ".endr");
-               WRITE_ONCE(*cr4, pae_disabled);
-
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
-{
-       const time_t TIMEOUT = 2; /* seconds, roughly */
-       struct kvm_x86_state *state;
-       struct kvm_translation tr;
-       struct kvm_run *run;
-       pthread_t thread;
-       time_t t;
-
-       run = vcpu->run;
-
-       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
-       vcpu_run(vcpu);
-       run->kvm_valid_regs = 0;
-
-       /* Save state *before* spawning the thread that mucks with vCPU state. */
-       state = vcpu_save_state(vcpu);
-
-       /*
-        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
-        * should already be set in guest state.
-        */
-       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
-                   (run->s.regs.sregs.efer & EFER_LME),
-                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
-                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
-                   !!(run->s.regs.sregs.efer & EFER_LME));
-
-       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
-
-       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
-               /*
-                * Reload known good state if the vCPU triple faults, e.g. due
-                * to the unhandled #GPs being injected.  VMX preserves state
-                * on shutdown, but SVM synthesizes an INIT as the VMCB state
-                * is architecturally undefined on triple fault.
-                */
-               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
-                       vcpu_load_state(vcpu, state);
-
-               if (racer == race_sregs_cr4) {
-                       tr = (struct kvm_translation) { .linear_address = 0 };
-                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
-               }
-       }
-
-       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
-       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
-
-       kvm_x86_state_cleanup(state);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request reading invalid register set from VCPU. */
-       run->kvm_valid_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-
-       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request setting invalid register set into VCPU. */
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_vcpu_events events;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-
-       /* Request and verify all valid register sets. */
-       /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs.sregs);
-
-       vcpu_events_get(vcpu, &events);
-       compare_vcpu_events(&events, &run->s.regs.events);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_vcpu_events events;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-
-       /* Run once to get register set */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       /* Set and verify various register values. */
-       run->s.regs.regs.rbx = 0xBAD1DEA;
-       run->s.regs.sregs.apic_base = 1 << 11;
-       /* TODO run->s.regs.events.XYZ = ABC; */
-
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-       TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
-                   "apic_base sync regs value incorrect 0x%llx.",
-                   run->s.regs.sregs.apic_base);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs.sregs);
-
-       vcpu_events_get(vcpu, &events);
-       compare_vcpu_events(&events, &run->s.regs.events);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-
-       /* Clear kvm_dirty_regs bits, verify new s.regs values are
-        * overwritten with existing guest values.
-        */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = 0;
-       run->s.regs.regs.rbx = 0xDEADBEEF;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_regs regs;
-
-       /* Run once to get register set */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       /* Clear kvm_valid_regs bits and kvm_dirty_bits.
-        * Verify s.regs values are not overwritten with existing guest values
-        * and that guest values are not overwritten with kvm_sync_regs values.
-        */
-       run->kvm_valid_regs = 0;
-       run->kvm_dirty_regs = 0;
-       run->s.regs.regs.rbx = 0xAAAA;
-       vcpu_regs_get(vcpu, &regs);
-       regs.rbx = 0xBAC0;
-       vcpu_regs_set(vcpu, &regs);
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-       vcpu_regs_get(vcpu, &regs);
-       TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
-                   "rbx guest value incorrect 0x%llx.",
-                   regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_regs regs;
-
-       /* Run once to get register set */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
-        * with existing guest values but that guest values are overwritten
-        * with kvm_sync_regs values.
-        */
-       run->kvm_valid_regs = 0;
-       run->kvm_dirty_regs = TEST_SYNC_FIELDS;
-       run->s.regs.regs.rbx = 0xBBBB;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-       vcpu_regs_get(vcpu, &regs);
-       TEST_ASSERT(regs.rbx == 0xBBBB + 1,
-                   "rbx guest value incorrect 0x%llx.",
-                   regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
-{
-       race_sync_regs(vcpu, race_sregs_cr4);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
-{
-       race_sync_regs(vcpu, race_events_exc);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
-{
-       race_sync_regs(vcpu, race_events_inj_pen);
-}
-
-int main(int argc, char *argv[])
-{
-       int cap;
-
-       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
-       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
-
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c

deleted file mode 100644 (file)

index 56306a1..0000000
--- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define ARBITRARY_IO_PORT      0x2000
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
-       asm volatile("inb %%dx, %%al"
-                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
-}
-
-#define L2_GUEST_STACK_SIZE 64
-unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-void l1_guest_code_vmx(struct vmx_pages *vmx)
-{
-
-       GUEST_ASSERT(vmx->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-       GUEST_ASSERT(load_vmcs(vmx));
-
-       prepare_vmcs(vmx, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_ASSERT(!vmlaunch());
-       /* L2 should triple fault after a triple fault event injected. */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
-       GUEST_DONE();
-}
-
-void l1_guest_code_svm(struct svm_test_data *svm)
-{
-       struct vmcb *vmcb = svm->vmcb;
-
-       generic_svm_setup(svm, l2_guest_code,
-                       &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* don't intercept shutdown to test the case of SVM allowing to do so */
-       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
-
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       /* should not reach here, L1 should crash  */
-       GUEST_ASSERT(0);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vcpu_events events;
-       struct ucall uc;
-
-       bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
-       bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
-
-       TEST_REQUIRE(has_vmx || has_svm);
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
-
-
-       if (has_vmx) {
-               vm_vaddr_t vmx_pages_gva;
-
-               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
-               vcpu_alloc_vmx(vm, &vmx_pages_gva);
-               vcpu_args_set(vcpu, 1, vmx_pages_gva);
-       } else {
-               vm_vaddr_t svm_gva;
-
-               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
-               vcpu_alloc_svm(vm, &svm_gva);
-               vcpu_args_set(vcpu, 1, svm_gva);
-       }
-
-       vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
-       run = vcpu->run;
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
-                   "Expected IN from port %d from L2, got port %d",
-                   ARBITRARY_IO_PORT, run->io.port);
-       vcpu_events_get(vcpu, &events);
-       events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
-       events.triple_fault.pending = true;
-       vcpu_events_set(vcpu, &events);
-       run->immediate_exit = true;
-       vcpu_run_complete_io(vcpu);
-
-       vcpu_events_get(vcpu, &events);
-       TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
-                   "Triple fault event invalid");
-       TEST_ASSERT(events.triple_fault.pending,
-                   "No triple fault pending");
-       vcpu_run(vcpu);
-
-
-       if (has_svm) {
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
-       } else {
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_DONE:
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-               }
-       }
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c

deleted file mode 100644 (file)

index 12b0964..0000000
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <string.h>
-#include "kvm_util.h"
-#include "processor.h"
-
-#define UNITY                  (1ull << 30)
-#define HOST_ADJUST            (UNITY * 64)
-#define GUEST_STEP             (UNITY * 4)
-#define ROUND(x)               ((x + UNITY / 2) & -UNITY)
-#define rounded_rdmsr(x)       ROUND(rdmsr(x))
-#define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vcpu, x))
-
-static void guest_code(void)
-{
-       u64 val = 0;
-
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
-       val = 1ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
-       GUEST_SYNC(2);
-       val = 2ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC_ADJUST, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Host: setting the TSC offset.  */
-       GUEST_SYNC(3);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
-        * host-side offset and affect both MSRs.
-        */
-       GUEST_SYNC(4);
-       val = 3ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC_ADJUST, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
-        * offset is now visible in MSR_IA32_TSC_ADJUST.
-        */
-       GUEST_SYNC(5);
-       val = 4ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
-
-       GUEST_DONE();
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               if (!strcmp((const char *)uc.args[0], "hello") &&
-                   uc.args[1] == stage + 1)
-                       ksft_test_result_pass("stage %d passed\n", stage + 1);
-               else
-                       ksft_test_result_fail(
-                               "stage %d: Unexpected register values vmexit, got %lx",
-                               stage + 1, (ulong)uc.args[1]);
-               return;
-       case UCALL_DONE:
-               ksft_test_result_pass("stage %d passed\n", stage + 1);
-               return;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_ASSERT(false, "Unexpected exit: %s",
-                           exit_reason_str(vcpu->run->exit_reason));
-       }
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t val;
-
-       ksft_print_header();
-       ksft_set_plan(5);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       val = 0;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
-       run_vcpu(vcpu, 1);
-       val = 1ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
-       run_vcpu(vcpu, 2);
-       val = 2ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Host: writes to MSR_IA32_TSC set the host-side offset
-        * and therefore do not change MSR_IA32_TSC_ADJUST.
-        */
-       vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-       run_vcpu(vcpu, 3);
-
-       /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
-       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
-
-       /* Restore previous value.  */
-       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
-        * host-side offset and affect both MSRs.
-        */
-       run_vcpu(vcpu, 4);
-       val = 3ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
-        * offset is now visible in MSR_IA32_TSC_ADJUST.
-        */
-       run_vcpu(vcpu, 5);
-       val = 4ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
-
-       kvm_vm_free(vm);
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c

deleted file mode 100644 (file)

index 59c7304..0000000
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <sched.h>
-#include <signal.h>
-#include <pthread.h>
-
-#define NR_TEST_VCPUS 20
-
-static struct kvm_vm *vm;
-pthread_spinlock_t create_lock;
-
-#define TEST_TSC_KHZ    2345678UL
-#define TEST_TSC_OFFSET 200000000
-
-uint64_t tsc_sync;
-static void guest_code(void)
-{
-       uint64_t start_tsc, local_tsc, tmp;
-
-       start_tsc = rdtsc();
-       do {
-               tmp = READ_ONCE(tsc_sync);
-               local_tsc = rdtsc();
-               WRITE_ONCE(tsc_sync, local_tsc);
-               if (unlikely(local_tsc < tmp))
-                       GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
-
-       } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
-
-       GUEST_DONE();
-}
-
-
-static void *run_vcpu(void *_cpu_nr)
-{
-       unsigned long vcpu_id = (unsigned long)_cpu_nr;
-       unsigned long failures = 0;
-       static bool first_cpu_done;
-       struct kvm_vcpu *vcpu;
-
-       /* The kernel is fine, but vm_vcpu_add() needs locking */
-       pthread_spin_lock(&create_lock);
-
-       vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
-
-       if (!first_cpu_done) {
-               first_cpu_done = true;
-               vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
-       }
-
-       pthread_spin_unlock(&create_lock);
-
-       for (;;) {
-                struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-                case UCALL_DONE:
-                       goto out;
-
-                case UCALL_SYNC:
-                       printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
-                              uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
-                       failures++;
-                       break;
-
-                default:
-                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
- out:
-       return (void *)failures;
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
-
-       vm = vm_create(NR_TEST_VCPUS);
-       vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
-
-       pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
-       pthread_t cpu_threads[NR_TEST_VCPUS];
-       unsigned long cpu;
-       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
-               pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
-
-       unsigned long failures = 0;
-       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
-               void *this_cpu_failures;
-               pthread_join(cpu_threads[cpu], &this_cpu_failures);
-               failures += (unsigned long)this_cpu_failures;
-       }
-
-       TEST_ASSERT(!failures, "TSC sync failed");
-       pthread_spin_destroy(&create_lock);
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c

deleted file mode 100644 (file)

index 57f157c..0000000
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ /dev/null
@@ -1,295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucna_injection_test
- *
- * Copyright (C) 2022, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that user space can inject UnCorrectable No Action required (UCNA)
- * memory errors to the guest.
- *
- * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
- * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
- * corresponding per-bank control register (MCI_CTL2) bit enabled.
- * The test also checks that the UCNA errors get recorded in the
- * Machine Check bank registers no matter the error signal interrupts get
- * delivered into the guest or not.
- *
- */
-#include <pthread.h>
-#include <inttypes.h>
-#include <string.h>
-#include <time.h>
-
-#include "kvm_util.h"
-#include "mce.h"
-#include "processor.h"
-#include "test_util.h"
-#include "apic.h"
-
-#define SYNC_FIRST_UCNA 9
-#define SYNC_SECOND_UCNA 10
-#define SYNC_GP 11
-#define FIRST_UCNA_ADDR 0xdeadbeef
-#define SECOND_UCNA_ADDR 0xcafeb0ba
-
-/*
- * Vector for the CMCI interrupt.
- * Value is arbitrary. Any value in 0x20-0xFF should work:
- * https://wiki.osdev.org/Interrupt_Vector_Table
- */
-#define CMCI_VECTOR  0xa9
-
-#define UCNA_BANK  0x7 // IMC0 bank
-
-#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
-
-static uint64_t supported_mcg_caps;
-
-/*
- * Record states about the injected UCNA.
- * The variables started with the 'i_' prefixes are recorded in interrupt
- * handler. Variables without the 'i_' prefixes are recorded in guest main
- * execution thread.
- */
-static volatile uint64_t i_ucna_rcvd;
-static volatile uint64_t i_ucna_addr;
-static volatile uint64_t ucna_addr;
-static volatile uint64_t ucna_addr2;
-
-struct thread_params {
-       struct kvm_vcpu *vcpu;
-       uint64_t *p_i_ucna_rcvd;
-       uint64_t *p_i_ucna_addr;
-       uint64_t *p_ucna_addr;
-       uint64_t *p_ucna_addr2;
-};
-
-static void verify_apic_base_addr(void)
-{
-       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
-       uint64_t base = GET_APIC_BASE(msr);
-
-       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
-}
-
-static void ucna_injection_guest_code(void)
-{
-       uint64_t ctl2;
-       verify_apic_base_addr();
-       xapic_enable();
-
-       /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
-       xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
-       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
-
-       /* Enables interrupt in guest. */
-       asm volatile("sti");
-
-       /* Let user space inject the first UCNA */
-       GUEST_SYNC(SYNC_FIRST_UCNA);
-
-       ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-
-       /* Disables the per-bank CMCI signaling. */
-       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
-
-       /* Let the user space inject the second UCNA */
-       GUEST_SYNC(SYNC_SECOND_UCNA);
-
-       ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-       GUEST_DONE();
-}
-
-static void cmci_disabled_guest_code(void)
-{
-       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
-
-       GUEST_DONE();
-}
-
-static void cmci_enabled_guest_code(void)
-{
-       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
-
-       GUEST_DONE();
-}
-
-static void guest_cmci_handler(struct ex_regs *regs)
-{
-       i_ucna_rcvd++;
-       i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-       xapic_write_reg(APIC_EOI, 0);
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
-       GUEST_SYNC(SYNC_GP);
-}
-
-static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC");
-       TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
-       printf("vCPU received GP in guest.\n");
-}
-
-static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
-       /*
-        * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
-        * the IA32_MCi_STATUS register.
-        * MSCOD=1 (BIT[16] - MscodDataRdErr).
-        * MCACOD=0x0090 (Memory controller error format, channel 0)
-        */
-       uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
-                         MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
-       struct kvm_x86_mce mce = {};
-       mce.status = status;
-       mce.mcg_status = 0;
-       /*
-        * MCM_ADDR_PHYS indicates the reported address is a physical address.
-        * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
-        * is at 4KB granularity.
-        */
-       mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
-       mce.addr = addr;
-       mce.bank = UCNA_BANK;
-
-       vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
-}
-
-static void *run_ucna_injection(void *arg)
-{
-       struct thread_params *params = (struct thread_params *)arg;
-       struct ucall uc;
-       int old;
-       int r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(r == 0,
-                   "pthread_setcanceltype failed with errno=%d",
-                   r);
-
-       vcpu_run(params->vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC");
-       TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
-
-       printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
-
-       inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
-       vcpu_run(params->vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC");
-       TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
-
-       printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
-
-       inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
-       vcpu_run(params->vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
-       if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
-               TEST_ASSERT(false, "vCPU assertion failure: %s.",
-                           (const char *)uc.args[0]);
-       }
-
-       return NULL;
-}
-
-static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       params->vcpu = vcpu;
-       params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
-       params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
-       params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
-       params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
-
-       run_ucna_injection(params);
-
-       TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
-       TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
-                   "Only first UCNA reported addr get recorded via interrupt.");
-       TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
-                   "First injected UCNAs should get exposed via registers.");
-       TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
-                   "Second injected UCNAs should get exposed via registers.");
-
-       printf("Test successful.\n"
-              "UCNA CMCI interrupts received: %ld\n"
-              "Last UCNA address received via CMCI: %lx\n"
-              "First UCNA address in vCPU thread: %lx\n"
-              "Second UCNA address in vCPU thread: %lx\n",
-              *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
-              *params->p_ucna_addr, *params->p_ucna_addr2);
-}
-
-static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
-{
-       uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
-       if (enable_cmci_p)
-               mcg_caps |= MCG_CMCI_P;
-
-       mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
-       vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
-}
-
-static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
-                                                bool enable_cmci_p, void *guest_code)
-{
-       struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
-       setup_mce_cap(vcpu, enable_cmci_p);
-       return vcpu;
-}
-
-int main(int argc, char *argv[])
-{
-       struct thread_params params;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *ucna_vcpu;
-       struct kvm_vcpu *cmcidis_vcpu;
-       struct kvm_vcpu *cmci_vcpu;
-
-       kvm_check_cap(KVM_CAP_MCE);
-
-       vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
-
-       kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
-                 &supported_mcg_caps);
-
-       if (!(supported_mcg_caps & MCG_CMCI_P)) {
-               print_skip("MCG_CMCI_P is not supported");
-               exit(KSFT_SKIP);
-       }
-
-       ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
-       cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
-       cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
-
-       vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
-       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       test_ucna_injection(ucna_vcpu, &params);
-       run_vcpu_expect_gp(cmcidis_vcpu);
-       run_vcpu_expect_gp(cmci_vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c

deleted file mode 100644 (file)

index 9481cbc..0000000
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-static void guest_ins_port80(uint8_t *buffer, unsigned int count)
-{
-       unsigned long end;
-
-       if (count == 2)
-               end = (unsigned long)buffer + 1;
-       else
-               end = (unsigned long)buffer + 8192;
-
-       asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
-       GUEST_ASSERT_EQ(count, 0);
-       GUEST_ASSERT_EQ((unsigned long)buffer, end);
-}
-
-static void guest_code(void)
-{
-       uint8_t buffer[8192];
-       int i;
-
-       /*
-        * Special case tests.  main() will adjust RCX 2 => 1 and 3 => 8192 to
-        * test that KVM doesn't explode when userspace modifies the "count" on
-        * a userspace I/O exit.  KVM isn't required to play nice with the I/O
-        * itself as KVM doesn't support manipulating the count, it just needs
-        * to not explode or overflow a buffer.
-        */
-       guest_ins_port80(buffer, 2);
-       guest_ins_port80(buffer, 3);
-
-       /* Verify KVM fills the buffer correctly when not stuffing RCX. */
-       memset(buffer, 0, sizeof(buffer));
-       guest_ins_port80(buffer, 8192);
-       for (i = 0; i < 8192; i++)
-               __GUEST_ASSERT(buffer[i] == 0xaa,
-                              "Expected '0xaa', got '0x%x' at buffer[%u]",
-                              buffer[i], i);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_regs regs;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       memset(&regs, 0, sizeof(regs));
-
-       while (1) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               if (get_ucall(vcpu, &uc))
-                       break;
-
-               TEST_ASSERT(run->io.port == 0x80,
-                           "Expected I/O at port 0x80, got port 0x%x", run->io.port);
-
-               /*
-                * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
-                * Note, this abuses KVM's batching of rep string I/O to avoid
-                * getting stuck in an infinite loop.  That behavior isn't in
-                * scope from a testing perspective as it's not ABI in any way,
-                * i.e. it really is abusing internal KVM knowledge.
-                */
-               vcpu_regs_get(vcpu, &regs);
-               if (regs.rcx == 2)
-                       regs.rcx = 1;
-               if (regs.rcx == 3)
-                       regs.rcx = 8192;
-               memset((void *)run + run->io.data_offset, 0xaa, 4096);
-               vcpu_regs_set(vcpu, &regs);
-       }
-
-       switch (uc.cmd) {
-       case UCALL_DONE:
-               break;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c

deleted file mode 100644 (file)

index 32b2794..0000000
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ /dev/null
@@ -1,769 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for exiting into userspace on registered MSRs
- */
-#include <sys/ioctl.h>
-
-#include "kvm_test_harness.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MSR_NON_EXISTENT 0x474f4f00
-
-static u64 deny_bits = 0;
-struct kvm_msr_filter filter_allow = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ |
-                                KVM_MSR_FILTER_WRITE,
-                       .nmsrs = 1,
-                       /* Test an MSR the kernel knows about. */
-                       .base = MSR_IA32_XSS,
-                       .bitmap = (uint8_t*)&deny_bits,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ |
-                                KVM_MSR_FILTER_WRITE,
-                       .nmsrs = 1,
-                       /* Test an MSR the kernel doesn't know about. */
-                       .base = MSR_IA32_FLUSH_CMD,
-                       .bitmap = (uint8_t*)&deny_bits,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ |
-                                KVM_MSR_FILTER_WRITE,
-                       .nmsrs = 1,
-                       /* Test a fabricated MSR that no one knows about. */
-                       .base = MSR_NON_EXISTENT,
-                       .bitmap = (uint8_t*)&deny_bits,
-               },
-       },
-};
-
-struct kvm_msr_filter filter_fs = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .nmsrs = 1,
-                       .base = MSR_FS_BASE,
-                       .bitmap = (uint8_t*)&deny_bits,
-               },
-       },
-};
-
-struct kvm_msr_filter filter_gs = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .nmsrs = 1,
-                       .base = MSR_GS_BASE,
-                       .bitmap = (uint8_t*)&deny_bits,
-               },
-       },
-};
-
-static uint64_t msr_non_existent_data;
-static int guest_exception_count;
-static u32 msr_reads, msr_writes;
-
-static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_deadbeef[1] = { 0x1 };
-
-static void deny_msr(uint8_t *bitmap, u32 msr)
-{
-       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
-
-       bitmap[idx / 8] &= ~(1 << (idx % 8));
-}
-
-static void prepare_bitmaps(void)
-{
-       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
-       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
-       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
-       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
-       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
-
-       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
-       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
-       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
-}
-
-struct kvm_msr_filter filter_deny = {
-       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000_write,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
-                       .base = 0x40000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_40000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000_read,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
-                       .base = 0xdeadbeef,
-                       .nmsrs = 1,
-                       .bitmap = bitmap_deadbeef,
-               },
-       },
-};
-
-struct kvm_msr_filter no_filter_deny = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-};
-
-/*
- * Note: Force test_rdmsr() to not be inlined to prevent the labels,
- * rdmsr_start and rdmsr_end, from being defined multiple times.
- */
-static noinline uint64_t test_rdmsr(uint32_t msr)
-{
-       uint32_t a, d;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
-                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
-       return a | ((uint64_t) d << 32);
-}
-
-/*
- * Note: Force test_wrmsr() to not be inlined to prevent the labels,
- * wrmsr_start and wrmsr_end, from being defined multiple times.
- */
-static noinline void test_wrmsr(uint32_t msr, uint64_t value)
-{
-       uint32_t a = value;
-       uint32_t d = value >> 32;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
-                       "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-extern char rdmsr_start, rdmsr_end;
-extern char wrmsr_start, wrmsr_end;
-
-/*
- * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
- * rdmsr_start and rdmsr_end, from being defined multiple times.
- */
-static noinline uint64_t test_em_rdmsr(uint32_t msr)
-{
-       uint32_t a, d;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
-                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
-       return a | ((uint64_t) d << 32);
-}
-
-/*
- * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
- * wrmsr_start and wrmsr_end, from being defined multiple times.
- */
-static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
-{
-       uint32_t a = value;
-       uint32_t d = value >> 32;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
-                       "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-extern char em_rdmsr_start, em_rdmsr_end;
-extern char em_wrmsr_start, em_wrmsr_end;
-
-static void guest_code_filter_allow(void)
-{
-       uint64_t data;
-
-       /*
-        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
-        *
-        * A GP is thrown if anything other than 0 is written to
-        * MSR_IA32_XSS.
-        */
-       data = test_rdmsr(MSR_IA32_XSS);
-       GUEST_ASSERT(data == 0);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       test_wrmsr(MSR_IA32_XSS, 0);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       test_wrmsr(MSR_IA32_XSS, 1);
-       GUEST_ASSERT(guest_exception_count == 1);
-
-       /*
-        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
-        *
-        * A GP is thrown if MSR_IA32_FLUSH_CMD is read
-        * from or if a value other than 1 is written to it.
-        */
-       test_rdmsr(MSR_IA32_FLUSH_CMD);
-       GUEST_ASSERT(guest_exception_count == 1);
-
-       test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
-       GUEST_ASSERT(guest_exception_count == 1);
-
-       test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       /*
-        * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
-        *
-        * Test that a fabricated MSR can pass through the kernel
-        * and be handled in userspace.
-        */
-       test_wrmsr(MSR_NON_EXISTENT, 2);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       data = test_rdmsr(MSR_NON_EXISTENT);
-       GUEST_ASSERT(data == 2);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       if (is_forced_emulation_enabled) {
-               /* Let userspace know we aren't done. */
-               GUEST_SYNC(0);
-
-               /*
-                * Now run the same tests with the instruction emulator.
-                */
-               data = test_em_rdmsr(MSR_IA32_XSS);
-               GUEST_ASSERT(data == 0);
-               GUEST_ASSERT(guest_exception_count == 0);
-               test_em_wrmsr(MSR_IA32_XSS, 0);
-               GUEST_ASSERT(guest_exception_count == 0);
-               test_em_wrmsr(MSR_IA32_XSS, 1);
-               GUEST_ASSERT(guest_exception_count == 1);
-
-               test_em_rdmsr(MSR_IA32_FLUSH_CMD);
-               GUEST_ASSERT(guest_exception_count == 1);
-               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
-               GUEST_ASSERT(guest_exception_count == 1);
-               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
-               GUEST_ASSERT(guest_exception_count == 0);
-
-               test_em_wrmsr(MSR_NON_EXISTENT, 2);
-               GUEST_ASSERT(guest_exception_count == 0);
-               data = test_em_rdmsr(MSR_NON_EXISTENT);
-               GUEST_ASSERT(data == 2);
-               GUEST_ASSERT(guest_exception_count == 0);
-       }
-
-       GUEST_DONE();
-}
-
-static void guest_msr_calls(bool trapped)
-{
-       /* This goes into the in-kernel emulation */
-       wrmsr(MSR_SYSCALL_MASK, 0);
-
-       if (trapped) {
-               /* This goes into user space emulation */
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
-       } else {
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
-       }
-
-       /* If trapped == true, this goes into user space emulation */
-       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
-
-       /* This goes into the in-kernel emulation */
-       rdmsr(MSR_IA32_POWER_CTL);
-
-       /* Invalid MSR, should always be handled by user space exit */
-       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
-       wrmsr(0xdeadbeef, 0x1234);
-}
-
-static void guest_code_filter_deny(void)
-{
-       guest_msr_calls(true);
-
-       /*
-        * Disable msr filtering, so that the kernel
-        * handles everything in the next round
-        */
-       GUEST_SYNC(0);
-
-       guest_msr_calls(false);
-
-       GUEST_DONE();
-}
-
-static void guest_code_permission_bitmap(void)
-{
-       uint64_t data;
-
-       data = test_rdmsr(MSR_FS_BASE);
-       GUEST_ASSERT(data == MSR_FS_BASE);
-       data = test_rdmsr(MSR_GS_BASE);
-       GUEST_ASSERT(data != MSR_GS_BASE);
-
-       /* Let userspace know to switch the filter */
-       GUEST_SYNC(0);
-
-       data = test_rdmsr(MSR_FS_BASE);
-       GUEST_ASSERT(data != MSR_FS_BASE);
-       data = test_rdmsr(MSR_GS_BASE);
-       GUEST_ASSERT(data == MSR_GS_BASE);
-
-       GUEST_DONE();
-}
-
-static void __guest_gp_handler(struct ex_regs *regs,
-                              char *r_start, char *r_end,
-                              char *w_start, char *w_end)
-{
-       if (regs->rip == (uintptr_t)r_start) {
-               regs->rip = (uintptr_t)r_end;
-               regs->rax = 0;
-               regs->rdx = 0;
-       } else if (regs->rip == (uintptr_t)w_start) {
-               regs->rip = (uintptr_t)w_end;
-       } else {
-               GUEST_ASSERT(!"RIP is at an unknown location!");
-       }
-
-       ++guest_exception_count;
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
-       __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
-                          &wrmsr_start, &wrmsr_end);
-}
-
-static void guest_fep_gp_handler(struct ex_regs *regs)
-{
-       __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
-                          &em_wrmsr_start, &em_wrmsr_end);
-}
-
-static void check_for_guest_assert(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (vcpu->run->exit_reason == KVM_EXIT_IO &&
-           get_ucall(vcpu, &uc) == UCALL_ABORT) {
-               REPORT_GUEST_ASSERT(uc);
-       }
-}
-
-static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
-       struct kvm_run *run = vcpu->run;
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
-       TEST_ASSERT(run->msr.index == msr_index,
-                       "Unexpected msr (0x%04x), expected 0x%04x",
-                       run->msr.index, msr_index);
-
-       switch (run->msr.index) {
-       case MSR_IA32_XSS:
-               run->msr.data = 0;
-               break;
-       case MSR_IA32_FLUSH_CMD:
-               run->msr.error = 1;
-               break;
-       case MSR_NON_EXISTENT:
-               run->msr.data = msr_non_existent_data;
-               break;
-       case MSR_FS_BASE:
-               run->msr.data = MSR_FS_BASE;
-               break;
-       case MSR_GS_BASE:
-               run->msr.data = MSR_GS_BASE;
-               break;
-       default:
-               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
-       }
-}
-
-static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
-       struct kvm_run *run = vcpu->run;
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
-       TEST_ASSERT(run->msr.index == msr_index,
-                       "Unexpected msr (0x%04x), expected 0x%04x",
-                       run->msr.index, msr_index);
-
-       switch (run->msr.index) {
-       case MSR_IA32_XSS:
-               if (run->msr.data != 0)
-                       run->msr.error = 1;
-               break;
-       case MSR_IA32_FLUSH_CMD:
-               if (run->msr.data != 1)
-                       run->msr.error = 1;
-               break;
-       case MSR_NON_EXISTENT:
-               msr_non_existent_data = run->msr.data;
-               break;
-       default:
-               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
-       }
-}
-
-static void process_ucall_done(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
-                   "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
-                   uc.cmd, UCALL_DONE);
-}
-
-static uint64_t process_ucall(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc = {};
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               break;
-       case UCALL_ABORT:
-               check_for_guest_assert(vcpu);
-               break;
-       case UCALL_DONE:
-               process_ucall_done(vcpu);
-               break;
-       default:
-               TEST_ASSERT(false, "Unexpected ucall");
-       }
-
-       return uc.cmd;
-}
-
-static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
-                                        uint32_t msr_index)
-{
-       vcpu_run(vcpu);
-       process_rdmsr(vcpu, msr_index);
-}
-
-static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
-                                        uint32_t msr_index)
-{
-       vcpu_run(vcpu);
-       process_wrmsr(vcpu, msr_index);
-}
-
-static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
-{
-       vcpu_run(vcpu);
-       return process_ucall(vcpu);
-}
-
-static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
-{
-       vcpu_run(vcpu);
-       process_ucall_done(vcpu);
-}
-
-KVM_ONE_VCPU_TEST_SUITE(user_msr);
-
-KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       uint64_t cmd;
-       int rc;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
-
-       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
-       /* Process guest code userspace exits. */
-       run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-
-       run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-
-       run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
-       run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
-
-       vcpu_run(vcpu);
-       cmd = process_ucall(vcpu);
-
-       if (is_forced_emulation_enabled) {
-               TEST_ASSERT_EQ(cmd, UCALL_SYNC);
-               vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
-
-               /* Process emulated rdmsr and wrmsr instructions. */
-               run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-
-               run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-
-               run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
-               run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
-
-               /* Confirm the guest completed without issues. */
-               run_guest_then_process_ucall_done(vcpu);
-       } else {
-               TEST_ASSERT_EQ(cmd, UCALL_DONE);
-               printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
-       }
-}
-
-static int handle_ucall(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_SYNC:
-               vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
-               break;
-       case UCALL_DONE:
-               return 1;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-       return 0;
-}
-
-static void handle_rdmsr(struct kvm_run *run)
-{
-       run->msr.data = run->msr.index;
-       msr_reads++;
-
-       if (run->msr.index == MSR_SYSCALL_MASK ||
-           run->msr.index == MSR_GS_BASE) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR read trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "MSR deadbeef read trap w/o inval fault");
-       }
-}
-
-static void handle_wrmsr(struct kvm_run *run)
-{
-       /* ignore */
-       msr_writes++;
-
-       if (run->msr.index == MSR_IA32_POWER_CTL) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for MSR_IA32_POWER_CTL incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR_IA32_POWER_CTL trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for deadbeef incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "deadbeef trap w/o inval fault");
-       }
-}
-
-KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       struct kvm_run *run = vcpu->run;
-       int rc;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
-                                                     KVM_MSR_EXIT_REASON_UNKNOWN |
-                                                     KVM_MSR_EXIT_REASON_FILTER);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       prepare_bitmaps();
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
-
-       while (1) {
-               vcpu_run(vcpu);
-
-               switch (run->exit_reason) {
-               case KVM_EXIT_X86_RDMSR:
-                       handle_rdmsr(run);
-                       break;
-               case KVM_EXIT_X86_WRMSR:
-                       handle_wrmsr(run);
-                       break;
-               case KVM_EXIT_IO:
-                       if (handle_ucall(vcpu))
-                               goto done;
-                       break;
-               }
-
-       }
-
-done:
-       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
-       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-}
-
-KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       int rc;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
-       run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
-       TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
-                   "Expected ucall state to be UCALL_SYNC.");
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
-       run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
-       run_guest_then_process_ucall_done(vcpu);
-}
-
-#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)       \
-({                                                                     \
-       int r = __vm_ioctl(vm, cmd, arg);                               \
-                                                                       \
-       if (flag & valid_mask)                                          \
-               TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r));            \
-       else                                                            \
-               TEST_ASSERT(r == -1 && errno == EINVAL,                 \
-                           "Wanted EINVAL for %s with flag = 0x%llx, got  rc: %i errno: %i (%s)", \
-                           #cmd, flag, r, errno,  strerror(errno));    \
-})
-
-static void run_user_space_msr_flag_test(struct kvm_vm *vm)
-{
-       struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
-       int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
-       int rc;
-       int i;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-
-       for (i = 0; i < nflags; i++) {
-               cap.args[0] = BIT_ULL(i);
-               test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
-                          BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
-       }
-}
-
-static void run_msr_filter_flag_test(struct kvm_vm *vm)
-{
-       u64 deny_bits = 0;
-       struct kvm_msr_filter filter = {
-               .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-               .ranges = {
-                       {
-                               .flags = KVM_MSR_FILTER_READ,
-                               .nmsrs = 1,
-                               .base = 0,
-                               .bitmap = (uint8_t *)&deny_bits,
-                       },
-               },
-       };
-       int nflags;
-       int rc;
-       int i;
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       nflags = sizeof(filter.flags) * BITS_PER_BYTE;
-       for (i = 0; i < nflags; i++) {
-               filter.flags = BIT_ULL(i);
-               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
-                          BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
-       }
-
-       filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
-       nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
-       for (i = 0; i < nflags; i++) {
-               filter.ranges[0].flags = BIT_ULL(i);
-               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
-                          BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
-       }
-}
-
-/* Test that attempts to write to the unused bits in a flag fails. */
-KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
-{
-       struct kvm_vm *vm = vcpu->vm;
-
-       /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
-       run_user_space_msr_flag_test(vm);
-
-       /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
-       run_msr_filter_flag_test(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c

deleted file mode 100644 (file)

index a81a247..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_apic_access_test
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * The first subtest simply checks to see that an L2 guest can be
- * launched with a valid APIC-access address that is backed by a
- * page of L1 physical memory.
- *
- * The second subtest sets the APIC-access address to a (valid) L1
- * physical address that is not backed by memory. KVM can't handle
- * this situation, so resuming L2 should result in a KVM exit for
- * internal error (emulation). This is not an architectural
- * requirement. It is just a shortcoming of KVM. The internal error
- * is unfortunate, but it's better than what used to happen!
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-static void l2_guest_code(void)
-{
-       /* Exit to L1 */
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint32_t control;
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-       control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
-       control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
-       vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
-
-       /* Try to launch L2 with the memory-backed APIC-access address. */
-       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       vmwrite(APIC_ACCESS_ADDR, high_gpa);
-
-       /* Try to resume L2 with the unbacked APIC-access address. */
-       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned long apic_access_addr = ~0ul;
-       vm_vaddr_t vmx_pages_gva;
-       unsigned long high_gpa;
-       struct vmx_pages *vmx;
-       bool done = false;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       high_gpa = (vm->max_gfn - 1) << vm->page_shift;
-
-       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       prepare_virtualize_apic_accesses(vmx, vm);
-       vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
-
-       while (!done) {
-               volatile struct kvm_run *run = vcpu->run;
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               if (apic_access_addr == high_gpa) {
-                       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-                       TEST_ASSERT(run->internal.suberror ==
-                                   KVM_INTERNAL_ERROR_EMULATION,
-                                   "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
-                                   run->internal.suberror);
-                       break;
-               }
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       apic_access_addr = uc.args[1];
-                       break;
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               default:
-                       TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
-               }
-       }
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c

deleted file mode 100644 (file)

index dad9883..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_close_while_nested
- *
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Verify that nothing bad happens if a KVM user exits with open
- * file descriptors while executing a nested guest.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-enum {
-       PORT_L0_EXIT = 0x2000,
-};
-
-static void l2_guest_code(void)
-{
-       /* Exit to L0 */
-       asm volatile("inb %%dx, %%al"
-                    : : [port] "d" (PORT_L0_EXIT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       /* Allocate VMX pages and shared descriptors (vmx_pages). */
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       for (;;) {
-               volatile struct kvm_run *run = vcpu->run;
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               if (run->io.port == PORT_L0_EXIT)
-                       break;
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c

deleted file mode 100644 (file)

index fa512d0..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX            1
-#define TEST_MEM_PAGES                 3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM                 0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1               0xc0001000
-#define NESTED_TEST_MEM2               0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
-       READ_ONCE(*a);
-       WRITE_ONCE(*a, 1);
-       GUEST_SYNC(true);
-       GUEST_SYNC(false);
-
-       WRITE_ONCE(*b, 1);
-       GUEST_SYNC(true);
-       WRITE_ONCE(*b, 1);
-       GUEST_SYNC(true);
-       GUEST_SYNC(false);
-
-       /* Exit to L1 and never come back.  */
-       vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
-       l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
-       /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
-       l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       void *l2_rip;
-
-       GUEST_ASSERT(vmx->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-       GUEST_ASSERT(load_vmcs(vmx));
-
-       if (vmx->eptp_gpa)
-               l2_rip = l2_guest_code_ept_enabled;
-       else
-               l2_rip = l2_guest_code_ept_disabled;
-
-       prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(false);
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_SYNC(false);
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
-       vm_vaddr_t vmx_pages_gva = 0;
-       struct vmx_pages *vmx;
-       unsigned long *bmap;
-       uint64_t *host_test_mem;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       bool done = false;
-
-       pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       /* Add an extra memory slot for testing dirty logging */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   GUEST_TEST_MEM,
-                                   TEST_MEM_SLOT_INDEX,
-                                   TEST_MEM_PAGES,
-                                   KVM_MEM_LOG_DIRTY_PAGES);
-
-       /*
-        * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
-        * affects both L1 and L2.  However...
-        */
-       virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
-       /*
-        * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
-        * 0xc0000000.
-        *
-        * Note that prepare_eptp should be called only L1's GPA map is done,
-        * meaning after the last call to virt_map.
-        *
-        * When EPT is disabled, the L2 guest code will still access the same L1
-        * GPAs as the EPT enabled case.
-        */
-       if (enable_ept) {
-               prepare_eptp(vmx, vm, 0);
-               nested_map_memslot(vmx, vm, 0);
-               nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
-               nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
-       }
-
-       bmap = bitmap_zalloc(TEST_MEM_PAGES);
-       host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
-       while (!done) {
-               memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       /*
-                        * The nested guest wrote at offset 0x1000 in the memslot, but the
-                        * dirty bitmap must be filled in according to L1 GPA, not L2.
-                        */
-                       kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-                       if (uc.args[1]) {
-                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
-                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
-                       } else {
-                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
-                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
-                       }
-
-                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
-                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
-                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
-                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
-                       break;
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       test_vmx_dirty_log(/*enable_ept=*/false);
-
-       if (kvm_cpu_has_ept())
-               test_vmx_dirty_log(/*enable_ept=*/true);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c

deleted file mode 100644 (file)

index 3fd6ece..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <signal.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-
-#include "kselftest.h"
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       /* Loop on the ud2 until guest state is made invalid. */
-}
-
-static void guest_code(void)
-{
-       asm volatile("ud2");
-}
-
-static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
-                   "Expected emulation failure, got %d",
-                   run->emulation_failure.suberror);
-}
-
-static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
-{
-       /*
-        * Always run twice to verify KVM handles the case where _KVM_ queues
-        * an exception with invalid state and then exits to userspace, i.e.
-        * that KVM doesn't explode if userspace ignores the initial error.
-        */
-       __run_vcpu_with_invalid_state(vcpu);
-       __run_vcpu_with_invalid_state(vcpu);
-}
-
-static void set_timer(void)
-{
-       struct itimerval timer;
-
-       timer.it_value.tv_sec  = 0;
-       timer.it_value.tv_usec = 200;
-       timer.it_interval = timer.it_value;
-       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
-}
-
-static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
-{
-       static struct kvm_sregs sregs;
-
-       if (!sregs.cr0)
-               vcpu_sregs_get(vcpu, &sregs);
-       sregs.tr.unusable = !!set;
-       vcpu_sregs_set(vcpu, &sregs);
-}
-
-static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
-{
-       set_or_clear_invalid_guest_state(vcpu, true);
-}
-
-static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
-{
-       set_or_clear_invalid_guest_state(vcpu, false);
-}
-
-static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
-{
-       static struct kvm_vcpu *vcpu = NULL;
-
-       if (__vcpu)
-               vcpu = __vcpu;
-       return vcpu;
-}
-
-static void sigalrm_handler(int sig)
-{
-       struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
-       struct kvm_vcpu_events events;
-
-       TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
-
-       vcpu_events_get(vcpu, &events);
-
-       /*
-        * If an exception is pending, attempt KVM_RUN with invalid guest,
-        * otherwise rearm the timer and keep doing so until the timer fires
-        * between KVM queueing an exception and re-entering the guest.
-        */
-       if (events.exception.pending) {
-               set_invalid_guest_state(vcpu);
-               run_vcpu_with_invalid_state(vcpu);
-       } else {
-               set_timer();
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(host_cpu_is_intel);
-       TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       get_set_sigalrm_vcpu(vcpu);
-
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
-       /*
-        * Stuff invalid guest state for L2 by making TR unusuable.  The next
-        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
-        * emulating invalid guest state for L2.
-        */
-       set_invalid_guest_state(vcpu);
-       run_vcpu_with_invalid_state(vcpu);
-
-       /*
-        * Verify KVM also handles the case where userspace gains control while
-        * an exception is pending and stuffs invalid state.  Run with valid
-        * guest state and a timer firing every 200us, and attempt to enter the
-        * guest with invalid state when the handler interrupts KVM with an
-        * exception pending.
-        */
-       clear_invalid_guest_state(vcpu);
-       TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
-                   "Failed to register SIGALRM handler, errno = %d (%s)",
-                   errno, strerror(errno));
-
-       set_timer();
-       run_vcpu_with_invalid_state(vcpu);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c

deleted file mode 100644 (file)

index a100ee5..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define ARBITRARY_IO_PORT 0x2000
-
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
-       /*
-        * Generate an exit to L0 userspace, i.e. main(), via I/O to an
-        * arbitrary port.
-        */
-       asm volatile("inb %%dx, %%al"
-                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /*
-        * L2 must be run without unrestricted guest, verify that the selftests
-        * library hasn't enabled it.  Because KVM selftests jump directly to
-        * 64-bit mode, unrestricted guest support isn't required.
-        */
-       GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
-                    !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
-
-       GUEST_ASSERT(!vmlaunch());
-
-       /* L2 should triple fault after main() stuffs invalid guest state. */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva;
-       struct kvm_sregs sregs;
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       /* Allocate VMX pages and shared descriptors (vmx_pages). */
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       vcpu_run(vcpu);
-
-       run = vcpu->run;
-
-       /*
-        * The first exit to L0 userspace should be an I/O access from L2.
-        * Running L1 should launch L2 without triggering an exit to userspace.
-        */
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
-                   "Expected IN from port %d from L2, got port %d",
-                   ARBITRARY_IO_PORT, run->io.port);
-
-       /*
-        * Stuff invalid guest state for L2 by making TR unusuable.  The next
-        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
-        * emulating invalid guest state for L2.
-        */
-       memset(&sregs, 0, sizeof(sregs));
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs.tr.unusable = 1;
-       vcpu_sregs_set(vcpu, &sregs);
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_DONE:
-               break;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c

deleted file mode 100644 (file)

index 90720b6..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
+++ /dev/null
@@ -1,131 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VMX control MSR test
- *
- * Copyright (C) 2022 Google LLC.
- *
- * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
- * that KVM will set owned bits where appropriate, and will not if
- * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
- */
-#include <linux/bitmap.h>
-#include "kvm_util.h"
-#include "vmx.h"
-
-static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
-                                 uint64_t mask)
-{
-       uint64_t val = vcpu_get_msr(vcpu, msr_index);
-       uint64_t bit;
-
-       mask &= val;
-
-       for_each_set_bit(bit, &mask, 64) {
-               vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
-               vcpu_set_msr(vcpu, msr_index, val);
-       }
-}
-
-static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
-                               uint64_t mask)
-{
-       uint64_t val = vcpu_get_msr(vcpu, msr_index);
-       uint64_t bit;
-
-       mask = ~mask | val;
-
-       for_each_clear_bit(bit, &mask, 64) {
-               vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
-               vcpu_set_msr(vcpu, msr_index, val);
-       }
-}
-
-static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
-       vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
-       vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
-}
-
-static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
-{
-       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
-       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
-
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
-                           BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
-
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
-                           BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
-                           BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
-
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
-}
-
-static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
-                                           uint64_t msr_bit,
-                                           struct kvm_x86_cpu_feature feature)
-{
-       uint64_t val;
-
-       vcpu_clear_cpuid_feature(vcpu, feature);
-
-       val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
-
-       if (!kvm_cpu_has(feature))
-               return;
-
-       vcpu_set_cpuid_feature(vcpu, feature);
-}
-
-static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
-{
-       uint64_t supported_bits = FEAT_CTL_LOCKED |
-                                 FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
-                                 FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
-                                 FEAT_CTL_SGX_LC_ENABLED |
-                                 FEAT_CTL_SGX_ENABLED |
-                                 FEAT_CTL_LMCE_ENABLED;
-       int bit, r;
-
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
-
-       for_each_clear_bit(bit, &supported_bits, 64) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
-               TEST_ASSERT(r == 0,
-                           "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
-       }
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       /* No need to actually do KVM_RUN, thus no guest code. */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       vmx_save_restore_msrs_test(vcpu);
-       ia32_feature_control_msr_test(vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c

deleted file mode 100644 (file)

index 1759fa5..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_nested_tsc_scaling_test
- *
- * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
- *
- * This test case verifies that nested TSC scaling behaves as expected when
- * both L1 and L2 are scaled using different ratios. For this test we scale
- * L1 down and scale L2 up.
- */
-
-#include <time.h>
-
-#include "kvm_util.h"
-#include "vmx.h"
-#include "kselftest.h"
-
-/* L2 is scaled up (from L1's perspective) by this factor */
-#define L2_SCALE_FACTOR 4ULL
-
-#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
-#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
-
-#define L2_GUEST_STACK_SIZE 64
-
-enum { USLEEP, UCHECK_L1, UCHECK_L2 };
-#define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
-#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
-
-
-/*
- * This function checks whether the "actual" TSC frequency of a guest matches
- * its expected frequency. In order to account for delays in taking the TSC
- * measurements, a difference of 1% between the actual and the expected value
- * is tolerated.
- */
-static void compare_tsc_freq(uint64_t actual, uint64_t expected)
-{
-       uint64_t tolerance, thresh_low, thresh_high;
-
-       tolerance = expected / 100;
-       thresh_low = expected - tolerance;
-       thresh_high = expected + tolerance;
-
-       TEST_ASSERT(thresh_low < actual,
-               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
-               " but it actually is %"PRIu64,
-               thresh_low, thresh_high, actual);
-       TEST_ASSERT(thresh_high > actual,
-               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
-               " but it actually is %"PRIu64,
-               thresh_low, thresh_high, actual);
-}
-
-static void check_tsc_freq(int level)
-{
-       uint64_t tsc_start, tsc_end, tsc_freq;
-
-       /*
-        * Reading the TSC twice with about a second's difference should give
-        * us an approximation of the TSC frequency from the guest's
-        * perspective. Now, this won't be completely accurate, but it should
-        * be good enough for the purposes of this test.
-        */
-       tsc_start = rdmsr(MSR_IA32_TSC);
-       GUEST_SLEEP(1);
-       tsc_end = rdmsr(MSR_IA32_TSC);
-
-       tsc_freq = tsc_end - tsc_start;
-
-       GUEST_CHECK(level, tsc_freq);
-}
-
-static void l2_guest_code(void)
-{
-       check_tsc_freq(UCHECK_L2);
-
-       /* exit to L1 */
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint32_t control;
-
-       /* check that L1's frequency looks alright before launching L2 */
-       check_tsc_freq(UCHECK_L1);
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* prepare the VMCS for L2 execution */
-       prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* enable TSC offsetting and TSC scaling for L2 */
-       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-
-       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
-       control |= SECONDARY_EXEC_TSC_SCALING;
-       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
-
-       vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
-       vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
-       vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
-
-       /* launch L2 */
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       /* check that L1's frequency still looks good */
-       check_tsc_freq(UCHECK_L1);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       vm_vaddr_t vmx_pages_gva;
-
-       uint64_t tsc_start, tsc_end;
-       uint64_t tsc_khz;
-       uint64_t l1_scale_factor;
-       uint64_t l0_tsc_freq = 0;
-       uint64_t l1_tsc_freq = 0;
-       uint64_t l2_tsc_freq = 0;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
-       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
-       /*
-        * We set L1's scale factor to be a random number from 2 to 10.
-        * Ideally we would do the same for L2's factor but that one is
-        * referenced by both main() and l1_guest_code() and using a global
-        * variable does not work.
-        */
-       srand(time(NULL));
-       l1_scale_factor = (rand() % 9) + 2;
-       printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
-       printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
-
-       tsc_start = rdtsc();
-       sleep(1);
-       tsc_end = rdtsc();
-
-       l0_tsc_freq = tsc_end - tsc_start;
-       printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
-       TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
-
-       /* scale down L1's TSC frequency */
-       vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               case UCALL_SYNC:
-                       switch (uc.args[0]) {
-                       case USLEEP:
-                               sleep(uc.args[1]);
-                               break;
-                       case UCHECK_L1:
-                               l1_tsc_freq = uc.args[1];
-                               printf("L1's TSC frequency is around: %"PRIu64
-                                      "\n", l1_tsc_freq);
-
-                               compare_tsc_freq(l1_tsc_freq,
-                                                l0_tsc_freq / l1_scale_factor);
-                               break;
-                       case UCHECK_L2:
-                               l2_tsc_freq = uc.args[1];
-                               printf("L2's TSC frequency is around: %"PRIu64
-                                      "\n", l2_tsc_freq);
-
-                               compare_tsc_freq(l2_tsc_freq,
-                                                l1_tsc_freq * L2_SCALE_FACTOR);
-                               break;
-                       }
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c

deleted file mode 100644 (file)

index a1f5ff4..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ /dev/null
@@ -1,247 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for VMX-pmu perf capability msr
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Test to check the effect of various CPUID settings on
- * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
- * we write with KVM_SET_MSR is _not_ modified by the guest
- * and check it can be retrieved with KVM_GET_MSR, also test
- * the invalid LBR formats are rejected.
- */
-#include <sys/ioctl.h>
-
-#include <linux/bitmap.h>
-
-#include "kvm_test_harness.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-static union perf_capabilities {
-       struct {
-               u64     lbr_format:6;
-               u64     pebs_trap:1;
-               u64     pebs_arch_reg:1;
-               u64     pebs_format:4;
-               u64     smm_freeze:1;
-               u64     full_width_write:1;
-               u64 pebs_baseline:1;
-               u64     perf_metrics:1;
-               u64     pebs_output_pt_available:1;
-               u64     anythread_deprecated:1;
-       };
-       u64     capabilities;
-} host_cap;
-
-/*
- * The LBR format and most PEBS features are immutable, all other features are
- * fungible (if supported by the host and KVM).
- */
-static const union perf_capabilities immutable_caps = {
-       .lbr_format = -1,
-       .pebs_trap  = 1,
-       .pebs_arch_reg = 1,
-       .pebs_format = -1,
-       .pebs_baseline = 1,
-};
-
-static const union perf_capabilities format_caps = {
-       .lbr_format = -1,
-       .pebs_format = -1,
-};
-
-static void guest_test_perf_capabilities_gp(uint64_t val)
-{
-       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
-
-       __GUEST_ASSERT(vector == GP_VECTOR,
-                      "Expected #GP for value '0x%lx', got vector '0x%x'",
-                      val, vector);
-}
-
-static void guest_code(uint64_t current_val)
-{
-       int i;
-
-       guest_test_perf_capabilities_gp(current_val);
-       guest_test_perf_capabilities_gp(0);
-
-       for (i = 0; i < 64; i++)
-               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
-
-       GUEST_DONE();
-}
-
-KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
-
-/*
- * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
- * written, that the guest always sees the userspace controlled value, and that
- * PERF_CAPABILITIES is immutable after KVM_RUN.
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
-{
-       struct ucall uc;
-       int r, i;
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       vcpu_args_set(vcpu, 1, host_cap.capabilities);
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-
-       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
-                       host_cap.capabilities);
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-       TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
-
-       for (i = 0; i < 64; i++) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
-                                 host_cap.capabilities ^ BIT_ULL(i));
-               TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
-                           host_cap.capabilities ^ BIT_ULL(i));
-       }
-}
-
-/*
- * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
- * enabled, as well as '0' (to disable all features).
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
-{
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-}
-
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
-{
-       const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
-       int bit;
-
-       for_each_set_bit(bit, &fungible_caps, 64) {
-               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
-               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
-                            host_cap.capabilities & ~BIT_ULL(bit));
-       }
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-}
-
-/*
- * Verify KVM rejects attempts to set unsupported and/or immutable features in
- * PERF_CAPABILITIES.  Note, LBR format and PEBS format need to be validated
- * separately as they are multi-bit values, e.g. toggling or setting a single
- * bit can generate a false positive without dedicated safeguards.
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
-{
-       const uint64_t reserved_caps = (~host_cap.capabilities |
-                                       immutable_caps.capabilities) &
-                                      ~format_caps.capabilities;
-       union perf_capabilities val = host_cap;
-       int r, bit;
-
-       for_each_set_bit(bit, &reserved_caps, 64) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
-                                 host_cap.capabilities ^ BIT_ULL(bit));
-               TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
-                           host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
-                           BIT_ULL(bit), bit);
-       }
-
-       /*
-        * KVM only supports the host's native LBR format, as well as '0' (to
-        * disable LBR support).  Verify KVM rejects all other LBR formats.
-        */
-       for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
-               if (val.lbr_format == host_cap.lbr_format)
-                       continue;
-
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
-               TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
-                           val.lbr_format, host_cap.lbr_format);
-       }
-
-       /* Ditto for the PEBS format. */
-       for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
-               if (val.pebs_format == host_cap.pebs_format)
-                       continue;
-
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
-               TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
-                           val.pebs_format, host_cap.pebs_format);
-       }
-}
-
-/*
- * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
- * disabling the vPMU via CPUID also disables LBR support.  Set bits 2:0 of
- * LBR_TOS as those bits are writable across all uarch implementations (arch
- * LBRs will need to poke a different MSR).
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
-{
-       int r;
-
-       if (!host_cap.lbr_format)
-               return;
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-       vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
-
-       vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
-
-       r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
-       TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
-}
-
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
-{
-       uint64_t val;
-       int i, r;
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
-       TEST_ASSERT_EQ(val, host_cap.capabilities);
-
-       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
-
-       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
-       TEST_ASSERT_EQ(val, 0);
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-
-       for (i = 0; i < 64; i++) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
-               TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
-                           i, BIT_ULL(i));
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_is_pmu_enabled());
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
-
-       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
-       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
-
-       host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
-
-       TEST_ASSERT(host_cap.full_width_write,
-                   "Full-width writes should always be supported");
-
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c

deleted file mode 100644 (file)

index 00dd2ac..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ /dev/null
@@ -1,245 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VMX-preemption timer test
- *
- * Copyright (C) 2020, Google, LLC.
- *
- * Test to ensure the VM-Enter after migration doesn't
- * incorrectly restarts the timer with the full timer
- * value instead of partially decayed timer value
- *
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define PREEMPTION_TIMER_VALUE                 100000000ull
-#define PREEMPTION_TIMER_VALUE_THRESHOLD1       80000000ull
-
-u32 vmx_pt_rate;
-bool l2_save_restore_done;
-static u64 l2_vmx_pt_start;
-volatile u64 l2_vmx_pt_finish;
-
-union vmx_basic basic;
-union vmx_ctrl_msr ctrl_pin_rev;
-union vmx_ctrl_msr ctrl_exit_rev;
-
-void l2_guest_code(void)
-{
-       u64 vmx_pt_delta;
-
-       vmcall();
-       l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
-
-       /*
-        * Wait until the 1st threshold has passed
-        */
-       do {
-               l2_vmx_pt_finish = rdtsc();
-               vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
-                               vmx_pt_rate;
-       } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
-
-       /*
-        * Force L2 through Save and Restore cycle
-        */
-       GUEST_SYNC(1);
-
-       l2_save_restore_done = 1;
-
-       /*
-        * Now wait for the preemption timer to fire and
-        * exit to L1
-        */
-       while ((l2_vmx_pt_finish = rdtsc()))
-               ;
-}
-
-void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       u64 l1_vmx_pt_start;
-       u64 l1_vmx_pt_finish;
-       u64 l1_tsc_deadline, l2_tsc_deadline;
-
-       GUEST_ASSERT(vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /*
-        * Check for Preemption timer support
-        */
-       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
-       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
-                       : MSR_IA32_VMX_PINBASED_CTLS);
-       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
-                       : MSR_IA32_VMX_EXIT_CTLS);
-
-       if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
-           !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
-               return;
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
-
-       /*
-        * Turn on PIN control and resume the guest
-        */
-       GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
-                             vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
-                             PIN_BASED_VMX_PREEMPTION_TIMER));
-
-       GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
-                             PREEMPTION_TIMER_VALUE));
-
-       vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
-
-       l2_save_restore_done = 0;
-
-       l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
-
-       GUEST_ASSERT(!vmresume());
-
-       l1_vmx_pt_finish = rdtsc();
-
-       /*
-        * Ensure exit from L2 happens after L2 goes through
-        * save and restore
-        */
-       GUEST_ASSERT(l2_save_restore_done);
-
-       /*
-        * Ensure the exit from L2 is due to preemption timer expiry
-        */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
-
-       l1_tsc_deadline = l1_vmx_pt_start +
-               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
-
-       l2_tsc_deadline = l2_vmx_pt_start +
-               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
-
-       /*
-        * Sync with the host and pass the l1|l2 pt_expiry_finish times and
-        * tsc deadlines so that host can verify they are as expected
-        */
-       GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
-               l2_vmx_pt_finish, l2_tsc_deadline);
-}
-
-void guest_code(struct vmx_pages *vmx_pages)
-{
-       if (vmx_pages)
-               l1_guest_code(vmx_pages);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva = 0;
-
-       struct kvm_regs regs1, regs2;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       struct kvm_x86_state *state;
-       struct ucall uc;
-       int stage;
-
-       /*
-        * AMD currently does not implement any VMX features, so for now we
-        * just early out.
-        */
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vcpu_regs_get(vcpu, &regs1);
-
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-               /*
-                * If this stage 2 then we should verify the vmx pt expiry
-                * is as expected.
-                * From L1's perspective verify Preemption timer hasn't
-                * expired too early.
-                * From L2's perspective verify Preemption timer hasn't
-                * expired too late.
-                */
-               if (stage == 2) {
-
-                       pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
-                               stage, uc.args[2], uc.args[3]);
-
-                       pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
-                               stage, uc.args[4], uc.args[5]);
-
-                       TEST_ASSERT(uc.args[2] >= uc.args[3],
-                               "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
-                               stage, uc.args[2], uc.args[3]);
-
-                       TEST_ASSERT(uc.args[4] < uc.args[5],
-                               "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
-                               stage, uc.args[4], uc.args[5]);
-               }
-
-               state = vcpu_save_state(vcpu);
-               memset(&regs1, 0, sizeof(regs1));
-               vcpu_regs_get(vcpu, &regs1);
-
-               kvm_vm_release(vm);
-
-               /* Restore state in a new VM.  */
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-               kvm_x86_state_cleanup(state);
-
-               memset(&regs2, 0, sizeof(regs2));
-               vcpu_regs_get(vcpu, &regs2);
-               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                           (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c

deleted file mode 100644 (file)

index 67a62a5..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_set_nested_state_test
- *
- * Copyright (C) 2019, Google LLC.
- *
- * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <errno.h>
-#include <linux/kvm.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-
-/*
- * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
- * changes this should be updated.
- */
-#define VMCS12_REVISION 0x11e57ed0
-
-bool have_evmcs;
-
-void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
-{
-       vcpu_nested_state_set(vcpu, state);
-}
-
-void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
-                                   struct kvm_nested_state *state,
-                                   int expected_errno)
-{
-       int rv;
-
-       rv = __vcpu_nested_state_set(vcpu, state);
-       TEST_ASSERT(rv == -1 && errno == expected_errno,
-               "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
-               strerror(expected_errno), expected_errno, rv, strerror(errno),
-               errno);
-}
-
-void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
-                                    struct kvm_nested_state *state)
-{
-       test_nested_state_expect_errno(vcpu, state, EINVAL);
-}
-
-void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
-                                    struct kvm_nested_state *state)
-{
-       test_nested_state_expect_errno(vcpu, state, EFAULT);
-}
-
-void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
-                               u32 vmcs12_revision)
-{
-       /* Set revision_id in vmcs12 to vmcs12_revision. */
-       memcpy(&state->data, &vmcs12_revision, sizeof(u32));
-}
-
-void set_default_state(struct kvm_nested_state *state)
-{
-       memset(state, 0, sizeof(*state));
-       state->flags = KVM_STATE_NESTED_RUN_PENDING |
-                      KVM_STATE_NESTED_GUEST_MODE;
-       state->format = 0;
-       state->size = sizeof(*state);
-}
-
-void set_default_vmx_state(struct kvm_nested_state *state, int size)
-{
-       memset(state, 0, size);
-       if (have_evmcs)
-               state->flags = KVM_STATE_NESTED_EVMCS;
-       state->format = 0;
-       state->size = size;
-       state->hdr.vmx.vmxon_pa = 0x1000;
-       state->hdr.vmx.vmcs12_pa = 0x2000;
-       state->hdr.vmx.smm.flags = 0;
-       set_revision_id_for_vmcs12(state, VMCS12_REVISION);
-}
-
-void test_vmx_nested_state(struct kvm_vcpu *vcpu)
-{
-       /* Add a page for VMCS12. */
-       const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
-       struct kvm_nested_state *state =
-               (struct kvm_nested_state *)malloc(state_sz);
-
-       /* The format must be set to 0. 0 for VMX, 1 for SVM. */
-       set_default_vmx_state(state, state_sz);
-       state->format = 1;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * We cannot virtualize anything if the guest does not have VMX
-        * enabled.
-        */
-       set_default_vmx_state(state, state_sz);
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * We cannot virtualize anything if the guest does not have VMX
-        * enabled.  We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
-        * is set to -1ull, but the flags must be zero.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = -1ull;
-       test_nested_state_expect_einval(vcpu, state);
-
-       state->hdr.vmx.vmcs12_pa = -1ull;
-       state->flags = KVM_STATE_NESTED_EVMCS;
-       test_nested_state_expect_einval(vcpu, state);
-
-       state->flags = 0;
-       test_nested_state(vcpu, state);
-
-       /* Enable VMX in the guest CPUID. */
-       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
-
-       /*
-        * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
-        * setting the nested state. When the eVMCS flag is not set, the
-        * expected return value is '0'.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->flags = 0;
-       state->hdr.vmx.vmxon_pa = -1ull;
-       state->hdr.vmx.vmcs12_pa = -1ull;
-       test_nested_state(vcpu, state);
-
-       /*
-        * When eVMCS is supported, the eVMCS flag can only be set if the
-        * enlightened VMCS capability has been enabled.
-        */
-       if (have_evmcs) {
-               state->flags = KVM_STATE_NESTED_EVMCS;
-               test_nested_state_expect_einval(vcpu, state);
-               vcpu_enable_evmcs(vcpu);
-               test_nested_state(vcpu, state);
-       }
-
-       /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
-       state->hdr.vmx.smm.flags = 1;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* Invalid flags are rejected. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.flags = ~0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = -1ull;
-       state->flags = 0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* It is invalid to have vmxon_pa set to a non-page aligned address. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = 1;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
-        * KVM_STATE_NESTED_GUEST_MODE set together.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->flags = KVM_STATE_NESTED_GUEST_MODE  |
-                     KVM_STATE_NESTED_RUN_PENDING;
-       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * It is invalid to have any of the SMM flags set besides:
-        *      KVM_STATE_NESTED_SMM_GUEST_MODE
-        *      KVM_STATE_NESTED_SMM_VMXON
-        */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
-                               KVM_STATE_NESTED_SMM_VMXON);
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* Outside SMM, SMM flags must be zero. */
-       set_default_vmx_state(state, state_sz);
-       state->flags = 0;
-       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * Size must be large enough to fit kvm_nested_state and vmcs12
-        * if VMCS12 physical address is set
-        */
-       set_default_vmx_state(state, state_sz);
-       state->size = sizeof(*state);
-       state->flags = 0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       set_default_vmx_state(state, state_sz);
-       state->size = sizeof(*state);
-       state->flags = 0;
-       state->hdr.vmx.vmcs12_pa = -1;
-       test_nested_state(vcpu, state);
-
-       /*
-        * KVM_SET_NESTED_STATE succeeds with invalid VMCS
-        * contents but L2 not running.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->flags = 0;
-       test_nested_state(vcpu, state);
-
-       /* Invalid flags are rejected, even if no VMCS loaded. */
-       set_default_vmx_state(state, state_sz);
-       state->size = sizeof(*state);
-       state->flags = 0;
-       state->hdr.vmx.vmcs12_pa = -1;
-       state->hdr.vmx.flags = ~0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* vmxon_pa cannot be the same address as vmcs_pa. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = 0;
-       state->hdr.vmx.vmcs12_pa = 0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * Test that if we leave nesting the state reflects that when we get
-        * it again.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = -1ull;
-       state->hdr.vmx.vmcs12_pa = -1ull;
-       state->flags = 0;
-       test_nested_state(vcpu, state);
-       vcpu_nested_state_get(vcpu, state);
-       TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
-                   "Size must be between %ld and %d.  The size returned was %d.",
-                   sizeof(*state), state_sz, state->size);
-       TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
-       TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
-
-       free(state);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_nested_state state;
-       struct kvm_vcpu *vcpu;
-
-       have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-
-       /*
-        * AMD currently does not implement set_nested_state, so for now we
-        * just early out.
-        */
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       /*
-        * First run tests with VMX disabled to check error handling.
-        */
-       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
-
-       /* Passing a NULL kvm_nested_state causes a EFAULT. */
-       test_nested_state_expect_efault(vcpu, NULL);
-
-       /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
-       set_default_state(&state);
-       state.size = 0;
-       test_nested_state_expect_einval(vcpu, &state);
-
-       /*
-        * Setting the flags 0xf fails the flags check.  The only flags that
-        * can be used are:
-        *     KVM_STATE_NESTED_GUEST_MODE
-        *     KVM_STATE_NESTED_RUN_PENDING
-        *     KVM_STATE_NESTED_EVMCS
-        */
-       set_default_state(&state);
-       state.flags = 0xf;
-       test_nested_state_expect_einval(vcpu, &state);
-
-       /*
-        * If KVM_STATE_NESTED_RUN_PENDING is set then
-        * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
-        */
-       set_default_state(&state);
-       state.flags = KVM_STATE_NESTED_RUN_PENDING;
-       test_nested_state_expect_einval(vcpu, &state);
-
-       test_vmx_nested_state(vcpu);
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c

deleted file mode 100644 (file)

index 2ceb5c7..0000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_tsc_adjust_test
- *
- * Copyright (C) 2018, Google LLC.
- *
- * IA32_TSC_ADJUST test
- *
- * According to the SDM, "if an execution of WRMSR to the
- * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
- * the logical processor also adds (or subtracts) value X from the
- * IA32_TSC_ADJUST MSR.
- *
- * Note that when L1 doesn't intercept writes to IA32_TSC, a
- * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
- * value.
- *
- * This test verifies that this unusual case is handled correctly.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#ifndef MSR_IA32_TSC_ADJUST
-#define MSR_IA32_TSC_ADJUST 0x3b
-#endif
-
-#define TSC_ADJUST_VALUE (1ll << 32)
-#define TSC_OFFSET_VALUE -(1ll << 48)
-
-enum {
-       PORT_ABORT = 0x1000,
-       PORT_REPORT,
-       PORT_DONE,
-};
-
-enum {
-       VMXON_PAGE = 0,
-       VMCS_PAGE,
-       MSR_BITMAP_PAGE,
-
-       NUM_VMX_PAGES,
-};
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void check_ia32_tsc_adjust(int64_t max)
-{
-       int64_t adjust;
-
-       adjust = rdmsr(MSR_IA32_TSC_ADJUST);
-       GUEST_SYNC(adjust);
-       GUEST_ASSERT(adjust <= max);
-}
-
-static void l2_guest_code(void)
-{
-       uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
-
-       wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
-       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
-       /* Exit to L1 */
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint32_t control;
-       uintptr_t save_cr3;
-
-       GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
-       wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
-       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-       vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
-       /* Jump into L2.  First, test failure to load guest CR3.  */
-       save_cr3 = vmreadz(GUEST_CR3);
-       vmwrite(GUEST_CR3, -1ull);
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
-                    (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
-       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
-       vmwrite(GUEST_CR3, save_cr3);
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
-       GUEST_DONE();
-}
-
-static void report(int64_t val)
-{
-       pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
-               val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva;
-       struct kvm_vcpu *vcpu;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
-
-       /* Allocate VMX pages and shared descriptors (vmx_pages). */
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       report(uc.args[1]);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c

deleted file mode 100644 (file)

index a76078a..0000000
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ /dev/null
@@ -1,487 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * xapic_ipi_test
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
- * another vCPU that is halted when KVM's backing page for the APIC access
- * address has been moved by mm.
- *
- * The test starts two vCPUs: one that sends IPIs and one that continually
- * executes HLT. The sender checks that the halter has woken from the HLT and
- * has reentered HLT before sending the next IPI. While the vCPUs are running,
- * the host continually calls migrate_pages to move all of the process' pages
- * amongst the available numa nodes on the machine.
- *
- * Migration is a command line option. When used on non-numa machines will 
- * exit with error. Test is still usefull on non-numa for testing IPIs.
- */
-#include <getopt.h>
-#include <pthread.h>
-#include <inttypes.h>
-#include <string.h>
-#include <time.h>
-
-#include "kvm_util.h"
-#include "numaif.h"
-#include "processor.h"
-#include "test_util.h"
-#include "vmx.h"
-
-/* Default running time for the test */
-#define DEFAULT_RUN_SECS 3
-
-/* Default delay between migrate_pages calls (microseconds) */
-#define DEFAULT_DELAY_USECS 500000
-
-/*
- * Vector for IPI from sender vCPU to halting vCPU.
- * Value is arbitrary and was chosen for the alternating bit pattern. Any
- * value should work.
- */
-#define IPI_VECTOR      0xa5
-
-/*
- * Incremented in the IPI handler. Provides evidence to the sender that the IPI
- * arrived at the destination
- */
-static volatile uint64_t ipis_rcvd;
-
-/* Data struct shared between host main thread and vCPUs */
-struct test_data_page {
-       uint32_t halter_apic_id;
-       volatile uint64_t hlt_count;
-       volatile uint64_t wake_count;
-       uint64_t ipis_sent;
-       uint64_t migrations_attempted;
-       uint64_t migrations_completed;
-       uint32_t icr;
-       uint32_t icr2;
-       uint32_t halter_tpr;
-       uint32_t halter_ppr;
-
-       /*
-        *  Record local version register as a cross-check that APIC access
-        *  worked. Value should match what KVM reports (APIC_VERSION in
-        *  arch/x86/kvm/lapic.c). If test is failing, check that values match
-        *  to determine whether APIC access exits are working.
-        */
-       uint32_t halter_lvr;
-};
-
-struct thread_params {
-       struct test_data_page *data;
-       struct kvm_vcpu *vcpu;
-       uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
-};
-
-void verify_apic_base_addr(void)
-{
-       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
-       uint64_t base = GET_APIC_BASE(msr);
-
-       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
-}
-
-static void halter_guest_code(struct test_data_page *data)
-{
-       verify_apic_base_addr();
-       xapic_enable();
-
-       data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
-       data->halter_lvr = xapic_read_reg(APIC_LVR);
-
-       /*
-        * Loop forever HLTing and recording halts & wakes. Disable interrupts
-        * each time around to minimize window between signaling the pending
-        * halt to the sender vCPU and executing the halt. No need to disable on
-        * first run as this vCPU executes first and the host waits for it to
-        * signal going into first halt before starting the sender vCPU. Record
-        * TPR and PPR for diagnostic purposes in case the test fails.
-        */
-       for (;;) {
-               data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
-               data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
-               data->hlt_count++;
-               asm volatile("sti; hlt; cli");
-               data->wake_count++;
-       }
-}
-
-/*
- * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
- * enable diagnosing errant writes to the APIC access address backing page in
- * case of test failure.
- */
-static void guest_ipi_handler(struct ex_regs *regs)
-{
-       ipis_rcvd++;
-       xapic_write_reg(APIC_EOI, 77);
-}
-
-static void sender_guest_code(struct test_data_page *data)
-{
-       uint64_t last_wake_count;
-       uint64_t last_hlt_count;
-       uint64_t last_ipis_rcvd_count;
-       uint32_t icr_val;
-       uint32_t icr2_val;
-       uint64_t tsc_start;
-
-       verify_apic_base_addr();
-       xapic_enable();
-
-       /*
-        * Init interrupt command register for sending IPIs
-        *
-        * Delivery mode=fixed, per SDM:
-        *   "Delivers the interrupt specified in the vector field to the target
-        *    processor."
-        *
-        * Destination mode=physical i.e. specify target by its local APIC
-        * ID. This vCPU assumes that the halter vCPU has already started and
-        * set data->halter_apic_id.
-        */
-       icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
-       icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
-       data->icr = icr_val;
-       data->icr2 = icr2_val;
-
-       last_wake_count = data->wake_count;
-       last_hlt_count = data->hlt_count;
-       last_ipis_rcvd_count = ipis_rcvd;
-       for (;;) {
-               /*
-                * Send IPI to halter vCPU.
-                * First IPI can be sent unconditionally because halter vCPU
-                * starts earlier.
-                */
-               xapic_write_reg(APIC_ICR2, icr2_val);
-               xapic_write_reg(APIC_ICR, icr_val);
-               data->ipis_sent++;
-
-               /*
-                * Wait up to ~1 sec for halter to indicate that it has:
-                * 1. Received the IPI
-                * 2. Woken up from the halt
-                * 3. Gone back into halt
-                * Current CPUs typically run at 2.x Ghz which is ~2
-                * billion ticks per second.
-                */
-               tsc_start = rdtsc();
-               while (rdtsc() - tsc_start < 2000000000) {
-                       if ((ipis_rcvd != last_ipis_rcvd_count) &&
-                           (data->wake_count != last_wake_count) &&
-                           (data->hlt_count != last_hlt_count))
-                               break;
-               }
-
-               GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
-                            (data->wake_count != last_wake_count) &&
-                            (data->hlt_count != last_hlt_count));
-
-               last_wake_count = data->wake_count;
-               last_hlt_count = data->hlt_count;
-               last_ipis_rcvd_count = ipis_rcvd;
-       }
-}
-
-static void *vcpu_thread(void *arg)
-{
-       struct thread_params *params = (struct thread_params *)arg;
-       struct kvm_vcpu *vcpu = params->vcpu;
-       struct ucall uc;
-       int old;
-       int r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(r == 0,
-                   "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
-                   vcpu->id, r);
-
-       fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
-               TEST_ASSERT(false,
-                           "vCPU %u exited with error: %s.\n"
-                           "Sending vCPU sent %lu IPIs to halting vCPU\n"
-                           "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
-                           "Halter TPR=%#x PPR=%#x LVR=%#x\n"
-                           "Migrations attempted: %lu\n"
-                           "Migrations completed: %lu",
-                           vcpu->id, (const char *)uc.args[0],
-                           params->data->ipis_sent, params->data->hlt_count,
-                           params->data->wake_count,
-                           *params->pipis_rcvd, params->data->halter_tpr,
-                           params->data->halter_ppr, params->data->halter_lvr,
-                           params->data->migrations_attempted,
-                           params->data->migrations_completed);
-       }
-
-       return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
-       void *retval;
-       int r;
-
-       r = pthread_cancel(thread);
-       TEST_ASSERT(r == 0,
-                   "pthread_cancel on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-
-       r = pthread_join(thread, &retval);
-       TEST_ASSERT(r == 0,
-                   "pthread_join on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-       TEST_ASSERT(retval == PTHREAD_CANCELED,
-                   "expected retval=%p, got %p", PTHREAD_CANCELED,
-                   retval);
-}
-
-void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
-                  uint64_t *pipis_rcvd)
-{
-       long pages_not_moved;
-       unsigned long nodemask = 0;
-       unsigned long nodemasks[sizeof(nodemask) * 8];
-       int nodes = 0;
-       time_t start_time, last_update, now;
-       time_t interval_secs = 1;
-       int i, r;
-       int from, to;
-       unsigned long bit;
-       uint64_t hlt_count;
-       uint64_t wake_count;
-       uint64_t ipis_sent;
-
-       fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
-               delay_usecs);
-
-       /* Get set of first 64 numa nodes available */
-       r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
-                         0, MPOL_F_MEMS_ALLOWED);
-       TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
-
-       fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
-               "(each 1-bit indicates node is present): %#lx\n",
-               sizeof(nodemask) * 8, nodemask);
-
-       /* Init array of masks containing a single-bit in each, one for each
-        * available node. migrate_pages called below requires specifying nodes
-        * as bit masks.
-        */
-       for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
-               if (nodemask & bit) {
-                       nodemasks[nodes] = nodemask & bit;
-                       nodes++;
-               }
-       }
-
-       TEST_ASSERT(nodes > 1,
-                   "Did not find at least 2 numa nodes. Can't do migration");
-
-       fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
-
-       from = 0;
-       to = 1;
-       start_time = time(NULL);
-       last_update = start_time;
-
-       ipis_sent = data->ipis_sent;
-       hlt_count = data->hlt_count;
-       wake_count = data->wake_count;
-
-       while ((int)(time(NULL) - start_time) < run_secs) {
-               data->migrations_attempted++;
-
-               /*
-                * migrate_pages with PID=0 will migrate all pages of this
-                * process between the nodes specified as bitmasks. The page
-                * backing the APIC access address belongs to this process
-                * because it is allocated by KVM in the context of the
-                * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
-                * test may break or give a false positive signal.
-                */
-               pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
-                                               &nodemasks[from],
-                                               &nodemasks[to]);
-               if (pages_not_moved < 0)
-                       fprintf(stderr,
-                               "migrate_pages failed, errno=%d\n", errno);
-               else if (pages_not_moved > 0)
-                       fprintf(stderr,
-                               "migrate_pages could not move %ld pages\n",
-                               pages_not_moved);
-               else
-                       data->migrations_completed++;
-
-               from = to;
-               to++;
-               if (to == nodes)
-                       to = 0;
-
-               now = time(NULL);
-               if (((now - start_time) % interval_secs == 0) &&
-                   (now != last_update)) {
-                       last_update = now;
-                       fprintf(stderr,
-                               "%lu seconds: Migrations attempted=%lu completed=%lu, "
-                               "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
-                               now - start_time, data->migrations_attempted,
-                               data->migrations_completed,
-                               data->ipis_sent, *pipis_rcvd,
-                               data->hlt_count, data->wake_count);
-
-                       TEST_ASSERT(ipis_sent != data->ipis_sent &&
-                                   hlt_count != data->hlt_count &&
-                                   wake_count != data->wake_count,
-                                   "IPI, HLT and wake count have not increased "
-                                   "in the last %lu seconds. "
-                                   "HLTer is likely hung.", interval_secs);
-
-                       ipis_sent = data->ipis_sent;
-                       hlt_count = data->hlt_count;
-                       wake_count = data->wake_count;
-               }
-               usleep(delay_usecs);
-       }
-}
-
-void get_cmdline_args(int argc, char *argv[], int *run_secs,
-                     bool *migrate, int *delay_usecs)
-{
-       for (;;) {
-               int opt = getopt(argc, argv, "s:d:m");
-
-               if (opt == -1)
-                       break;
-               switch (opt) {
-               case 's':
-                       *run_secs = parse_size(optarg);
-                       break;
-               case 'm':
-                       *migrate = true;
-                       break;
-               case 'd':
-                       *delay_usecs = parse_size(optarg);
-                       break;
-               default:
-                       TEST_ASSERT(false,
-                                   "Usage: -s <runtime seconds>. Default is %d seconds.\n"
-                                   "-m adds calls to migrate_pages while vCPUs are running."
-                                   " Default is no migrations.\n"
-                                   "-d <delay microseconds> - delay between migrate_pages() calls."
-                                   " Default is %d microseconds.",
-                                   DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
-               }
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       int r;
-       int wait_secs;
-       const int max_halter_wait = 10;
-       int run_secs = 0;
-       int delay_usecs = 0;
-       struct test_data_page *data;
-       vm_vaddr_t test_data_page_vaddr;
-       bool migrate = false;
-       pthread_t threads[2];
-       struct thread_params params[2];
-       struct kvm_vm *vm;
-       uint64_t *pipis_rcvd;
-
-       get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
-       if (run_secs <= 0)
-               run_secs = DEFAULT_RUN_SECS;
-       if (delay_usecs <= 0)
-               delay_usecs = DEFAULT_DELAY_USECS;
-
-       vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
-
-       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
-
-       test_data_page_vaddr = vm_vaddr_alloc_page(vm);
-       data = addr_gva2hva(vm, test_data_page_vaddr);
-       memset(data, 0, sizeof(*data));
-       params[0].data = data;
-       params[1].data = data;
-
-       vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
-       vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
-
-       pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
-       params[0].pipis_rcvd = pipis_rcvd;
-       params[1].pipis_rcvd = pipis_rcvd;
-
-       /* Start halter vCPU thread and wait for it to execute first HLT. */
-       r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
-       TEST_ASSERT(r == 0,
-                   "pthread_create halter failed errno=%d", errno);
-       fprintf(stderr, "Halter vCPU thread started\n");
-
-       wait_secs = 0;
-       while ((wait_secs < max_halter_wait) && !data->hlt_count) {
-               sleep(1);
-               wait_secs++;
-       }
-
-       TEST_ASSERT(data->hlt_count,
-                   "Halter vCPU did not execute first HLT within %d seconds",
-                   max_halter_wait);
-
-       fprintf(stderr,
-               "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
-               data->halter_apic_id, wait_secs);
-
-       r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
-       TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
-
-       fprintf(stderr,
-               "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
-               run_secs);
-
-       if (!migrate)
-               sleep(run_secs);
-       else
-               do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
-
-       /*
-        * Cancel threads and wait for them to stop.
-        */
-       cancel_join_vcpu_thread(threads[0], params[0].vcpu);
-       cancel_join_vcpu_thread(threads[1], params[1].vcpu);
-
-       fprintf(stderr,
-               "Test successful after running for %d seconds.\n"
-               "Sending vCPU sent %lu IPIs to halting vCPU\n"
-               "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
-               "Halter APIC ID=%#x\n"
-               "Sender ICR value=%#x ICR2 value=%#x\n"
-               "Halter TPR=%#x PPR=%#x LVR=%#x\n"
-               "Migrations attempted: %lu\n"
-               "Migrations completed: %lu\n",
-               run_secs, data->ipis_sent,
-               data->hlt_count, data->wake_count, *pipis_rcvd,
-               data->halter_apic_id,
-               data->icr, data->icr2,
-               data->halter_tpr, data->halter_ppr, data->halter_lvr,
-               data->migrations_attempted, data->migrations_completed);
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c

deleted file mode 100644 (file)

index 88bcca1..0000000
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ /dev/null
@@ -1,262 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "apic.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-struct xapic_vcpu {
-       struct kvm_vcpu *vcpu;
-       bool is_x2apic;
-       bool has_xavic_errata;
-};
-
-static void xapic_guest_code(void)
-{
-       asm volatile("cli");
-
-       xapic_enable();
-
-       while (1) {
-               uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
-                              (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
-
-               xapic_write_reg(APIC_ICR2, val >> 32);
-               xapic_write_reg(APIC_ICR, val);
-               GUEST_SYNC(val);
-       }
-}
-
-#define X2APIC_RSVD_BITS_MASK  (GENMASK_ULL(31, 20) | \
-                               GENMASK_ULL(17, 16) | \
-                               GENMASK_ULL(13, 13))
-
-static void x2apic_guest_code(void)
-{
-       asm volatile("cli");
-
-       x2apic_enable();
-
-       do {
-               uint64_t val = x2apic_read_reg(APIC_IRR) |
-                              x2apic_read_reg(APIC_IRR + 0x10) << 32;
-
-               if (val & X2APIC_RSVD_BITS_MASK) {
-                       x2apic_write_reg_fault(APIC_ICR, val);
-               } else {
-                       x2apic_write_reg(APIC_ICR, val);
-                       GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
-               }
-               GUEST_SYNC(val);
-       } while (1);
-}
-
-static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
-{
-       struct kvm_vcpu *vcpu = x->vcpu;
-       struct kvm_lapic_state xapic;
-       struct ucall uc;
-       uint64_t icr;
-
-       /*
-        * Tell the guest what ICR value to write.  Use the IRR to pass info,
-        * all bits are valid and should not be modified by KVM (ignoring the
-        * fact that vectors 0-15 are technically illegal).
-        */
-       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-       *((u32 *)&xapic.regs[APIC_IRR]) = val;
-       *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
-       vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
-       TEST_ASSERT_EQ(uc.args[1], val);
-
-       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-       icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
-             (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
-       if (!x->is_x2apic) {
-               if (!x->has_xavic_errata)
-                       val &= (-1u | (0xffull << (32 + 24)));
-       } else if (val & X2APIC_RSVD_BITS_MASK) {
-               return;
-       }
-
-       if (x->has_xavic_errata)
-               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
-       else
-               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
-}
-
-static void __test_icr(struct xapic_vcpu *x, uint64_t val)
-{
-       /*
-        * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
-        * it is as _must_ be zero.  Intel simply ignores the bit.  Don't test
-        * the BUSY bit for x2APIC, as there is no single correct behavior.
-        */
-       if (!x->is_x2apic)
-               ____test_icr(x, val | APIC_ICR_BUSY);
-
-       ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
-}
-
-static void test_icr(struct xapic_vcpu *x)
-{
-       struct kvm_vcpu *vcpu = x->vcpu;
-       uint64_t icr, i, j;
-
-       icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
-       for (i = 0; i <= 0xff; i++)
-               __test_icr(x, icr | i);
-
-       icr = APIC_INT_ASSERT | APIC_DM_FIXED;
-       for (i = 0; i <= 0xff; i++)
-               __test_icr(x, icr | i);
-
-       /*
-        * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
-        * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
-        */
-       icr = APIC_INT_ASSERT | 0xff;
-       for (i = 0; i < 0xff; i++) {
-               if (i == vcpu->id)
-                       continue;
-               for (j = 0; j < 8; j++)
-                       __test_icr(x, i << (32 + 24) | icr | (j << 8));
-       }
-
-       /* And again with a shorthand destination for all types of IPIs. */
-       icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
-       for (i = 0; i < 8; i++)
-               __test_icr(x, icr | (i << 8));
-
-       /* And a few garbage value, just make sure it's an IRQ (blocked). */
-       __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
-       __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
-       __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
-}
-
-static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
-{
-       uint32_t apic_id, expected;
-       struct kvm_lapic_state xapic;
-
-       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
-
-       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-
-       expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
-       apic_id = *((u32 *)&xapic.regs[APIC_ID]);
-
-       TEST_ASSERT(apic_id == expected,
-                   "APIC_ID not set back to %s format; wanted = %x, got = %x",
-                   (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
-                   expected, apic_id);
-}
-
-/*
- * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
- * stuffs MSR_IA32_APICBASE.  Setting the APIC_ID when x2APIC is enabled and
- * when the APIC transitions for DISABLED to ENABLED is architectural behavior
- * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
- * attempted to transition from x2APIC to xAPIC without disabling the APIC is
- * architecturally disallowed.
- */
-static void test_apic_id(void)
-{
-       const uint32_t NR_VCPUS = 3;
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       uint64_t apic_base;
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
-       vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
-
-       for (i = 0; i < NR_VCPUS; i++) {
-               apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
-
-               TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
-                           "APIC not in ENABLED state at vCPU RESET");
-               TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
-                           "APIC not in xAPIC mode at vCPU RESET");
-
-               __test_apic_id(vcpus[i], apic_base);
-               __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
-               __test_apic_id(vcpus[i], apic_base);
-       }
-
-       kvm_vm_free(vm);
-}
-
-static void test_x2apic_id(void)
-{
-       struct kvm_lapic_state lapic = {};
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-
-       /*
-        * Try stuffing a modified x2APIC ID, KVM should ignore the value and
-        * always return the vCPU's default/readonly x2APIC ID.
-        */
-       for (i = 0; i <= 0xff; i++) {
-               *(u32 *)(lapic.regs + APIC_ID) = i << 24;
-               *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
-               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
-
-               vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
-               TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
-                           "x2APIC ID should be fully readonly");
-       }
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       struct xapic_vcpu x = {
-               .vcpu = NULL,
-               .is_x2apic = true,
-       };
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
-       test_icr(&x);
-       kvm_vm_free(vm);
-
-       /*
-        * Use a second VM for the xAPIC test so that x2APIC can be hidden from
-        * the guest in order to test AVIC.  KVM disallows changing CPUID after
-        * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
-        */
-       vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
-       x.is_x2apic = false;
-
-       /*
-        * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
-        * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
-        * drops writes, AMD does not).  Account for the errata when checking
-        * that KVM reads back what was written.
-        */
-       x.has_xavic_errata = host_cpu_is_amd &&
-                            get_kvm_amd_param_bool("avic");
-
-       vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-       test_icr(&x);
-       kvm_vm_free(vm);
-
-       test_apic_id();
-       test_x2apic_id();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c

deleted file mode 100644 (file)

index c8a5c5e..0000000
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ /dev/null
@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * XCR0 cpuid test
- *
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-/*
- * Assert that architectural dependency rules are satisfied, e.g. that AVX is
- * supported if and only if SSE is supported.
- */
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
-do {                                                                                   \
-       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
-                                                                                       \
-       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
-                      __supported == ((xfeatures) | (dependencies)),                   \
-                      "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx",  \
-                      __supported, (xfeatures), (dependencies));                       \
-} while (0)
-
-/*
- * Assert that KVM reports a sane, usable as-is XCR0.  Architecturally, a CPU
- * isn't strictly required to _support_ all XFeatures related to a feature, but
- * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
- * disabled coherently.  E.g. a CPU can technically enumerate supported for
- * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
- * XTILE_DATA will #GP.
- */
-#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures)         \
-do {                                                                   \
-       uint64_t __supported = (supported_xcr0) & (xfeatures);          \
-                                                                       \
-       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
-                      "supported = 0x%lx, xfeatures = 0x%llx",         \
-                      __supported, (xfeatures));                       \
-} while (0)
-
-static void guest_code(void)
-{
-       uint64_t initial_xcr0;
-       uint64_t supported_xcr0;
-       int i, vector;
-
-       set_cr4(get_cr4() | X86_CR4_OSXSAVE);
-
-       initial_xcr0 = xgetbv(0);
-       supported_xcr0 = this_cpu_supported_xcr0();
-
-       GUEST_ASSERT(initial_xcr0 == supported_xcr0);
-
-       /* Check AVX */
-       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
-                                    XFEATURE_MASK_YMM,
-                                    XFEATURE_MASK_SSE);
-
-       /* Check MPX */
-       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
-                                   XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
-       /* Check AVX-512 */
-       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
-                                    XFEATURE_MASK_AVX512,
-                                    XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
-       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
-                                   XFEATURE_MASK_AVX512);
-
-       /* Check AMX */
-       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
-                                   XFEATURE_MASK_XTILE);
-
-       vector = xsetbv_safe(0, XFEATURE_MASK_FP);
-       __GUEST_ASSERT(!vector,
-                      "Expected success on XSETBV(FP), got vector '0x%x'",
-                      vector);
-
-       vector = xsetbv_safe(0, supported_xcr0);
-       __GUEST_ASSERT(!vector,
-                      "Expected success on XSETBV(0x%lx), got vector '0x%x'",
-                      supported_xcr0, vector);
-
-       for (i = 0; i < 64; i++) {
-               if (supported_xcr0 & BIT_ULL(i))
-                       continue;
-
-               vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
-               __GUEST_ASSERT(vector == GP_VECTOR,
-                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
-                              BIT_ULL(i), supported_xcr0, vector);
-       }
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       while (1) {
-               vcpu_run(vcpu);
-
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                           "Unexpected exit reason: %u (%s),",
-                           run->exit_reason,
-                           exit_reason_str(run->exit_reason));
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

deleted file mode 100644 (file)

index a59b3c7..0000000
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ /dev/null
@@ -1,1161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <sched.h>
-#include <signal.h>
-#include <pthread.h>
-
-#include <sys/eventfd.h>
-
-#define SHINFO_REGION_GVA      0xc0000000ULL
-#define SHINFO_REGION_GPA      0xc0000000ULL
-#define SHINFO_REGION_SLOT     10
-
-#define DUMMY_REGION_GPA       (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
-#define DUMMY_REGION_SLOT      11
-
-#define DUMMY_REGION_GPA_2     (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
-#define DUMMY_REGION_SLOT_2    12
-
-#define SHINFO_ADDR    (SHINFO_REGION_GPA)
-#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
-#define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
-#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
-
-#define SHINFO_VADDR   (SHINFO_REGION_GVA)
-#define VCPU_INFO_VADDR        (SHINFO_REGION_GVA + 0x40)
-#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
-
-#define EVTCHN_VECTOR  0x10
-
-#define EVTCHN_TEST1 15
-#define EVTCHN_TEST2 66
-#define EVTCHN_TIMER 13
-
-enum {
-       TEST_INJECT_VECTOR = 0,
-       TEST_RUNSTATE_runnable,
-       TEST_RUNSTATE_blocked,
-       TEST_RUNSTATE_offline,
-       TEST_RUNSTATE_ADJUST,
-       TEST_RUNSTATE_DATA,
-       TEST_STEAL_TIME,
-       TEST_EVTCHN_MASKED,
-       TEST_EVTCHN_UNMASKED,
-       TEST_EVTCHN_SLOWPATH,
-       TEST_EVTCHN_SEND_IOCTL,
-       TEST_EVTCHN_HCALL,
-       TEST_EVTCHN_HCALL_SLOWPATH,
-       TEST_EVTCHN_HCALL_EVENTFD,
-       TEST_TIMER_SETUP,
-       TEST_TIMER_WAIT,
-       TEST_TIMER_RESTORE,
-       TEST_POLL_READY,
-       TEST_POLL_TIMEOUT,
-       TEST_POLL_MASKED,
-       TEST_POLL_WAKE,
-       SET_VCPU_INFO,
-       TEST_TIMER_PAST,
-       TEST_LOCKING_SEND_RACE,
-       TEST_LOCKING_POLL_RACE,
-       TEST_LOCKING_POLL_TIMEOUT,
-       TEST_DONE,
-
-       TEST_GUEST_SAW_IRQ,
-};
-
-#define XEN_HYPERCALL_MSR      0x40000000
-
-#define MIN_STEAL_TIME         50000
-
-#define SHINFO_RACE_TIMEOUT    2       /* seconds */
-
-#define __HYPERVISOR_set_timer_op      15
-#define __HYPERVISOR_sched_op          29
-#define __HYPERVISOR_event_channel_op  32
-
-#define SCHEDOP_poll                   3
-
-#define EVTCHNOP_send                  4
-
-#define EVTCHNSTAT_interdomain         2
-
-struct evtchn_send {
-       u32 port;
-};
-
-struct sched_poll {
-       u32 *ports;
-       unsigned int nr_ports;
-       u64 timeout;
-};
-
-struct pvclock_vcpu_time_info {
-       u32   version;
-       u32   pad0;
-       u64   tsc_timestamp;
-       u64   system_time;
-       u32   tsc_to_system_mul;
-       s8    tsc_shift;
-       u8    flags;
-       u8    pad[2];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-       u32   version;
-       u32   sec;
-       u32   nsec;
-} __attribute__((__packed__));
-
-struct vcpu_runstate_info {
-       uint32_t state;
-       uint64_t state_entry_time;
-       uint64_t time[5]; /* Extra field for overrun check */
-};
-
-struct compat_vcpu_runstate_info {
-       uint32_t state;
-       uint64_t state_entry_time;
-       uint64_t time[5];
-} __attribute__((__packed__));
-
-struct arch_vcpu_info {
-       unsigned long cr2;
-       unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
-};
-
-struct vcpu_info {
-       uint8_t evtchn_upcall_pending;
-       uint8_t evtchn_upcall_mask;
-       unsigned long evtchn_pending_sel;
-       struct arch_vcpu_info arch;
-       struct pvclock_vcpu_time_info time;
-}; /* 64 bytes (x86) */
-
-struct shared_info {
-       struct vcpu_info vcpu_info[32];
-       unsigned long evtchn_pending[64];
-       unsigned long evtchn_mask[64];
-       struct pvclock_wall_clock wc;
-       uint32_t wc_sec_hi;
-       /* arch_shared_info here */
-};
-
-#define RUNSTATE_running  0
-#define RUNSTATE_runnable 1
-#define RUNSTATE_blocked  2
-#define RUNSTATE_offline  3
-
-static const char *runstate_names[] = {
-       "running",
-       "runnable",
-       "blocked",
-       "offline"
-};
-
-struct {
-       struct kvm_irq_routing info;
-       struct kvm_irq_routing_entry entries[2];
-} irq_routes;
-
-static volatile bool guest_saw_irq;
-
-static void evtchn_handler(struct ex_regs *regs)
-{
-       struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
-
-       vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
-       vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
-       guest_saw_irq = true;
-
-       GUEST_SYNC(TEST_GUEST_SAW_IRQ);
-}
-
-static void guest_wait_for_irq(void)
-{
-       while (!guest_saw_irq)
-               __asm__ __volatile__ ("rep nop" : : : "memory");
-       guest_saw_irq = false;
-}
-
-static void guest_code(void)
-{
-       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
-       int i;
-
-       __asm__ __volatile__(
-               "sti\n"
-               "nop\n"
-       );
-
-       /* Trigger an interrupt injection */
-       GUEST_SYNC(TEST_INJECT_VECTOR);
-
-       guest_wait_for_irq();
-
-       /* Test having the host set runstates manually */
-       GUEST_SYNC(TEST_RUNSTATE_runnable);
-       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
-       GUEST_ASSERT(rs->state == 0);
-
-       GUEST_SYNC(TEST_RUNSTATE_blocked);
-       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
-       GUEST_ASSERT(rs->state == 0);
-
-       GUEST_SYNC(TEST_RUNSTATE_offline);
-       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
-       GUEST_ASSERT(rs->state == 0);
-
-       /* Test runstate time adjust */
-       GUEST_SYNC(TEST_RUNSTATE_ADJUST);
-       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
-       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
-
-       /* Test runstate time set */
-       GUEST_SYNC(TEST_RUNSTATE_DATA);
-       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
-       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
-       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
-       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
-
-       /* sched_yield() should result in some 'runnable' time */
-       GUEST_SYNC(TEST_STEAL_TIME);
-       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
-
-       /* Attempt to deliver a *masked* interrupt */
-       GUEST_SYNC(TEST_EVTCHN_MASKED);
-
-       /* Wait until we see the bit set */
-       struct shared_info *si = (void *)SHINFO_VADDR;
-       while (!si->evtchn_pending[0])
-               __asm__ __volatile__ ("rep nop" : : : "memory");
-
-       /* Now deliver an *unmasked* interrupt */
-       GUEST_SYNC(TEST_EVTCHN_UNMASKED);
-
-       guest_wait_for_irq();
-
-       /* Change memslots and deliver an interrupt */
-       GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
-
-       guest_wait_for_irq();
-
-       /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
-       GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_EVTCHN_HCALL);
-
-       /* Our turn. Deliver event channel (to ourselves) with
-        * EVTCHNOP_send hypercall. */
-       struct evtchn_send s = { .port = 127 };
-       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
-
-       /*
-        * Same again, but this time the host has messed with memslots so it
-        * should take the slow path in kvm_xen_set_evtchn().
-        */
-       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
-
-       /* Deliver "outbound" event channel to an eventfd which
-        * happens to be one of our own irqfds. */
-       s.port = 197;
-       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_TIMER_SETUP);
-
-       /* Set a timer 100ms in the future. */
-       xen_hypercall(__HYPERVISOR_set_timer_op,
-                     rs->state_entry_time + 100000000, NULL);
-
-       GUEST_SYNC(TEST_TIMER_WAIT);
-
-       /* Now wait for the timer */
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_TIMER_RESTORE);
-
-       /* The host has 'restored' the timer. Just wait for it. */
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_POLL_READY);
-
-       /* Poll for an event channel port which is already set */
-       u32 ports[1] = { EVTCHN_TIMER };
-       struct sched_poll p = {
-               .ports = ports,
-               .nr_ports = 1,
-               .timeout = 0,
-       };
-
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       GUEST_SYNC(TEST_POLL_TIMEOUT);
-
-       /* Poll for an unset port and wait for the timeout. */
-       p.timeout = 100000000;
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       GUEST_SYNC(TEST_POLL_MASKED);
-
-       /* A timer will wake the masked port we're waiting on, while we poll */
-       p.timeout = 0;
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       GUEST_SYNC(TEST_POLL_WAKE);
-
-       /* Set the vcpu_info to point at exactly the place it already is to
-        * make sure the attribute is functional. */
-       GUEST_SYNC(SET_VCPU_INFO);
-
-       /* A timer wake an *unmasked* port which should wake us with an
-        * actual interrupt, while we're polling on a different port. */
-       ports[0]++;
-       p.timeout = 0;
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_TIMER_PAST);
-
-       /* Timer should have fired already */
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_LOCKING_SEND_RACE);
-       /* Racing host ioctls */
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_LOCKING_POLL_RACE);
-       /* Racing vmcall against host ioctl */
-
-       ports[0] = 0;
-
-       p = (struct sched_poll) {
-               .ports = ports,
-               .nr_ports = 1,
-               .timeout = 0
-       };
-
-wait_for_timer:
-       /*
-        * Poll for a timer wake event while the worker thread is mucking with
-        * the shared info.  KVM XEN drops timer IRQs if the shared info is
-        * invalid when the timer expires.  Arbitrarily poll 100 times before
-        * giving up and asking the VMM to re-arm the timer.  100 polls should
-        * consume enough time to beat on KVM without taking too long if the
-        * timer IRQ is dropped due to an invalid event channel.
-        */
-       for (i = 0; i < 100 && !guest_saw_irq; i++)
-               __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       /*
-        * Re-send the timer IRQ if it was (likely) dropped due to the timer
-        * expiring while the event channel was invalid.
-        */
-       if (!guest_saw_irq) {
-               GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
-               goto wait_for_timer;
-       }
-       guest_saw_irq = false;
-
-       GUEST_SYNC(TEST_DONE);
-}
-
-static struct shared_info *shinfo;
-static struct vcpu_info *vinfo;
-static struct kvm_vcpu *vcpu;
-
-static void handle_alrm(int sig)
-{
-       if (vinfo)
-               printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
-       vcpu_dump(stdout, vcpu, 0);
-       TEST_FAIL("IRQ delivery timed out");
-}
-
-static void *juggle_shinfo_state(void *arg)
-{
-       struct kvm_vm *vm = (struct kvm_vm *)arg;
-
-       struct kvm_xen_hvm_attr cache_activate_gfn = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
-       };
-
-       struct kvm_xen_hvm_attr cache_deactivate_gfn = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.gfn = KVM_XEN_INVALID_GFN
-       };
-
-       struct kvm_xen_hvm_attr cache_activate_hva = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
-               .u.shared_info.hva = (unsigned long)shinfo
-       };
-
-       struct kvm_xen_hvm_attr cache_deactivate_hva = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.hva = 0
-       };
-
-       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-
-       for (;;) {
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
-               pthread_testcancel();
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
-
-               if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
-                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
-                       pthread_testcancel();
-                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
-               }
-       }
-
-       return NULL;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_xen_hvm_attr evt_reset;
-       struct kvm_vm *vm;
-       pthread_t thread;
-       bool verbose;
-       int ret;
-
-       verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
-                              !strncmp(argv[1], "--verbose", 10));
-
-       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
-
-       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
-       bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
-       bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
-       bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
-       bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Map a region for the shared_info page */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
-       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
-
-       shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
-
-       int zero_fd = open("/dev/zero", O_RDONLY);
-       TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
-
-       struct kvm_xen_hvm_config hvmc = {
-               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
-               .msr = XEN_HYPERCALL_MSR,
-       };
-
-       /* Let the kernel know that we *will* use it for sending all
-        * event channels, which lets it intercept SCHEDOP_poll */
-       if (do_evtchn_tests)
-               hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
-
-       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
-       struct kvm_xen_hvm_attr lm = {
-               .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
-               .u.long_mode = 1,
-       };
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
-       if (do_runstate_flag) {
-               struct kvm_xen_hvm_attr ruf = {
-                       .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
-                       .u.runstate_update_flag = 1,
-               };
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
-
-               ruf.u.runstate_update_flag = 0;
-               vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
-               TEST_ASSERT(ruf.u.runstate_update_flag == 1,
-                           "Failed to read back RUNSTATE_UPDATE_FLAG attr");
-       }
-
-       struct kvm_xen_hvm_attr ha = {};
-
-       if (has_shinfo_hva) {
-               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
-               ha.u.shared_info.hva = (unsigned long)shinfo;
-       } else {
-               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
-               ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
-       }
-
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
-
-       /*
-        * Test what happens when the HVA of the shinfo page is remapped after
-        * the kernel has a reference to it. But make sure we copy the clock
-        * info over since that's only set at setup time, and we test it later.
-        */
-       struct pvclock_wall_clock wc_copy = shinfo->wc;
-       void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
-       TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
-       shinfo->wc = wc_copy;
-
-       struct kvm_xen_vcpu_attr vi = {
-               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
-               .u.gpa = VCPU_INFO_ADDR,
-       };
-       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
-
-       struct kvm_xen_vcpu_attr pvclock = {
-               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
-               .u.gpa = PVTIME_ADDR,
-       };
-       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
-
-       struct kvm_xen_hvm_attr vec = {
-               .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
-               .u.vector = EVTCHN_VECTOR,
-       };
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
-
-       vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
-
-       if (do_runstate_tests) {
-               struct kvm_xen_vcpu_attr st = {
-                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
-                       .u.gpa = RUNSTATE_ADDR,
-               };
-               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
-       }
-
-       int irq_fd[2] = { -1, -1 };
-
-       if (do_eventfd_tests) {
-               irq_fd[0] = eventfd(0, 0);
-               irq_fd[1] = eventfd(0, 0);
-
-               /* Unexpected, but not a KVM failure */
-               if (irq_fd[0] == -1 || irq_fd[1] == -1)
-                       do_evtchn_tests = do_eventfd_tests = false;
-       }
-
-       if (do_eventfd_tests) {
-               irq_routes.info.nr = 2;
-
-               irq_routes.entries[0].gsi = 32;
-               irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
-               irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
-               irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
-               irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
-               irq_routes.entries[1].gsi = 33;
-               irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
-               irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
-               irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
-               irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
-               vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
-
-               struct kvm_irqfd ifd = { };
-
-               ifd.fd = irq_fd[0];
-               ifd.gsi = 32;
-               vm_ioctl(vm, KVM_IRQFD, &ifd);
-
-               ifd.fd = irq_fd[1];
-               ifd.gsi = 33;
-               vm_ioctl(vm, KVM_IRQFD, &ifd);
-
-               struct sigaction sa = { };
-               sa.sa_handler = handle_alrm;
-               sigaction(SIGALRM, &sa, NULL);
-       }
-
-       struct kvm_xen_vcpu_attr tmr = {
-               .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
-               .u.timer.port = EVTCHN_TIMER,
-               .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
-               .u.timer.expires_ns = 0
-       };
-
-       if (do_evtchn_tests) {
-               struct kvm_xen_hvm_attr inj = {
-                       .type = KVM_XEN_ATTR_TYPE_EVTCHN,
-                       .u.evtchn.send_port = 127,
-                       .u.evtchn.type = EVTCHNSTAT_interdomain,
-                       .u.evtchn.flags = 0,
-                       .u.evtchn.deliver.port.port = EVTCHN_TEST1,
-                       .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
-                       .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
-               };
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
-               /* Test migration to a different vCPU */
-               inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
-               inj.u.evtchn.deliver.port.vcpu = vcpu->id;
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
-               inj.u.evtchn.send_port = 197;
-               inj.u.evtchn.deliver.eventfd.port = 0;
-               inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
-               inj.u.evtchn.flags = 0;
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
-               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-       }
-       vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
-       vinfo->evtchn_upcall_pending = 0;
-
-       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
-       rs->state = 0x5a;
-
-       bool evtchn_irq_expected = false;
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC: {
-                       struct kvm_xen_vcpu_attr rst;
-                       long rundelay;
-
-                       if (do_runstate_tests)
-                               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-                                           rs->time[1] + rs->time[2] + rs->time[3],
-                                           "runstate times don't add up");
-
-                       switch (uc.args[1]) {
-                       case TEST_INJECT_VECTOR:
-                               if (verbose)
-                                       printf("Delivering evtchn upcall\n");
-                               evtchn_irq_expected = true;
-                               vinfo->evtchn_upcall_pending = 1;
-                               break;
-
-                       case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
-                               TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
-                               if (!do_runstate_tests)
-                                       goto done;
-                               if (verbose)
-                                       printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
-                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
-                               rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
-                                       TEST_RUNSTATE_runnable;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
-                               break;
-
-                       case TEST_RUNSTATE_ADJUST:
-                               if (verbose)
-                                       printf("Testing RUNSTATE_ADJUST\n");
-                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
-                               memset(&rst.u, 0, sizeof(rst.u));
-                               rst.u.runstate.state = (uint64_t)-1;
-                               rst.u.runstate.time_blocked =
-                                       0x5a - rs->time[RUNSTATE_blocked];
-                               rst.u.runstate.time_offline =
-                                       0x6b6b - rs->time[RUNSTATE_offline];
-                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
-                                       rst.u.runstate.time_offline;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
-                               break;
-
-                       case TEST_RUNSTATE_DATA:
-                               if (verbose)
-                                       printf("Testing RUNSTATE_DATA\n");
-                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
-                               memset(&rst.u, 0, sizeof(rst.u));
-                               rst.u.runstate.state = RUNSTATE_running;
-                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
-                               rst.u.runstate.time_blocked = 0x6b6b;
-                               rst.u.runstate.time_offline = 0x5a;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
-                               break;
-
-                       case TEST_STEAL_TIME:
-                               if (verbose)
-                                       printf("Testing steal time\n");
-                               /* Yield until scheduler delay exceeds target */
-                               rundelay = get_run_delay() + MIN_STEAL_TIME;
-                               do {
-                                       sched_yield();
-                               } while (get_run_delay() < rundelay);
-                               break;
-
-                       case TEST_EVTCHN_MASKED:
-                               if (!do_eventfd_tests)
-                                       goto done;
-                               if (verbose)
-                                       printf("Testing masked event channel\n");
-                               shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
-                               eventfd_write(irq_fd[0], 1UL);
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_UNMASKED:
-                               if (verbose)
-                                       printf("Testing unmasked event channel\n");
-                               /* Unmask that, but deliver the other one */
-                               shinfo->evtchn_pending[0] = 0;
-                               shinfo->evtchn_mask[0] = 0;
-                               eventfd_write(irq_fd[1], 1UL);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_SLOWPATH:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[1] = 0;
-                               if (verbose)
-                                       printf("Testing event channel after memslot change\n");
-                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                                           DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
-                               eventfd_write(irq_fd[0], 1UL);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_SEND_IOCTL:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               if (!do_evtchn_tests)
-                                       goto done;
-
-                               shinfo->evtchn_pending[0] = 0;
-                               if (verbose)
-                                       printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
-
-                               struct kvm_irq_routing_xen_evtchn e;
-                               e.port = EVTCHN_TEST2;
-                               e.vcpu = vcpu->id;
-                               e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
-                               vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_HCALL:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[1] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest EVTCHNOP_send direct to evtchn\n");
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_HCALL_SLOWPATH:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[0] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
-                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                                           DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_HCALL_EVENTFD:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[0] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest EVTCHNOP_send to eventfd\n");
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_TIMER_SETUP:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[1] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest oneshot timer\n");
-                               break;
-
-                       case TEST_TIMER_WAIT:
-                               memset(&tmr, 0, sizeof(tmr));
-                               tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-                               TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
-                                           "Timer port not returned");
-                               TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
-                                           "Timer priority not returned");
-                               TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
-                                           "Timer expiry not returned");
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_TIMER_RESTORE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[0] = 0;
-
-                               if (verbose)
-                                       printf("Testing restored oneshot timer\n");
-
-                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_READY:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll with already pending event\n");
-                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_TIMEOUT:
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll timeout\n");
-                               shinfo->evtchn_pending[0] = 0;
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_MASKED:
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll wake on masked event\n");
-
-                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_WAKE:
-                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll wake on unmasked event\n");
-
-                               evtchn_irq_expected = true;
-                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-
-                               /* Read it back and check the pending time is reported correctly */
-                               tmr.u.timer.expires_ns = 0;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-                               TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
-                                           "Timer not reported pending");
-                               alarm(1);
-                               break;
-
-                       case SET_VCPU_INFO:
-                               if (has_shinfo_hva) {
-                                       struct kvm_xen_vcpu_attr vih = {
-                                               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
-                                               .u.hva = (unsigned long)vinfo
-                                       };
-                                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
-                               }
-                               break;
-
-                       case TEST_TIMER_PAST:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               /* Read timer and check it is no longer pending */
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-                               TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
-
-                               shinfo->evtchn_pending[0] = 0;
-                               if (verbose)
-                                       printf("Testing timer in the past\n");
-
-                               evtchn_irq_expected = true;
-                               tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               alarm(1);
-                               break;
-
-                       case TEST_LOCKING_SEND_RACE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               alarm(0);
-
-                               if (verbose)
-                                       printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
-
-                               ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
-                               TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
-
-                               struct kvm_irq_routing_xen_evtchn uxe = {
-                                       .port = 1,
-                                       .vcpu = vcpu->id,
-                                       .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
-                               };
-
-                               evtchn_irq_expected = true;
-                               for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
-                                       __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
-                               break;
-
-                       case TEST_LOCKING_POLL_RACE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-
-                               if (verbose)
-                                       printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
-
-                               shinfo->evtchn_pending[0] = 1;
-
-                               evtchn_irq_expected = true;
-                               tmr.u.timer.expires_ns = rs->state_entry_time +
-                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               break;
-
-                       case TEST_LOCKING_POLL_TIMEOUT:
-                               /*
-                                * Optional and possibly repeated sync point.
-                                * Injecting the timer IRQ may fail if the
-                                * shinfo is invalid when the timer expires.
-                                * If the timer has expired but the IRQ hasn't
-                                * been delivered, rearm the timer and retry.
-                                */
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-
-                               /* Resume the guest if the timer is still pending. */
-                               if (tmr.u.timer.expires_ns)
-                                       break;
-
-                               /* All done if the IRQ was delivered. */
-                               if (!evtchn_irq_expected)
-                                       break;
-
-                               tmr.u.timer.expires_ns = rs->state_entry_time +
-                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               break;
-                       case TEST_DONE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-
-                               ret = pthread_cancel(thread);
-                               TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
-
-                               ret = pthread_join(thread, 0);
-                               TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
-                               goto done;
-
-                       case TEST_GUEST_SAW_IRQ:
-                               TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
-                               evtchn_irq_expected = false;
-                               break;
-                       }
-                       break;
-               }
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-
- done:
-       evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
-       evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
-
-       alarm(0);
-
-       /*
-        * Just a *really* basic check that things are being put in the
-        * right place. The actual calculations are much the same for
-        * Xen as they are for the KVM variants, so no need to check.
-        */
-       struct pvclock_wall_clock *wc;
-       struct pvclock_vcpu_time_info *ti, *ti2;
-       struct kvm_clock_data kcdata;
-       long long delta;
-
-       wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
-       ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
-       ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
-
-       if (verbose) {
-               printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
-               printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
-                      ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
-                      ti->tsc_shift, ti->flags);
-               printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
-                      ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
-                      ti2->tsc_shift, ti2->flags);
-       }
-
-       TEST_ASSERT(wc->version && !(wc->version & 1),
-                   "Bad wallclock version %x", wc->version);
-
-       vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
-
-       if (kcdata.flags & KVM_CLOCK_REALTIME) {
-               if (verbose) {
-                       printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
-                              kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
-                       printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
-                              kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
-               }
-
-               delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
-
-               /*
-                * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
-                * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
-                * delta as testing clock accuracy is not the goal here. The test just needs to
-                * check that the value in shinfo is somewhat sane.
-                */
-               TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
-                           "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
-                           wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
-                           (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
-       } else {
-               pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
-       }
-
-       TEST_ASSERT(ti->version && !(ti->version & 1),
-                   "Bad time_info version %x", ti->version);
-       TEST_ASSERT(ti2->version && !(ti2->version & 1),
-                   "Bad time_info version %x", ti->version);
-
-       if (do_runstate_tests) {
-               /*
-                * Fetch runstate and check sanity. Strictly speaking in the
-                * general case we might not expect the numbers to be identical
-                * but in this case we know we aren't running the vCPU any more.
-                */
-               struct kvm_xen_vcpu_attr rst = {
-                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
-               };
-               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
-
-               if (verbose) {
-                       printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
-                              rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
-                              rs->state, rs->state_entry_time);
-                       for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
-                               printf("State %s: %" PRIu64 " ns\n",
-                                      runstate_names[i], rs->time[i]);
-                       }
-               }
-
-               /*
-                * Exercise runstate info at all points across the page boundary, in
-                * 32-bit and 64-bit mode. In particular, test the case where it is
-                * configured in 32-bit mode and then switched to 64-bit mode while
-                * active, which takes it onto the second page.
-                */
-               unsigned long runstate_addr;
-               struct compat_vcpu_runstate_info *crs;
-               for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
-                    runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
-
-                       rs = addr_gpa2hva(vm, runstate_addr);
-                       crs = (void *)rs;
-
-                       memset(rs, 0xa5, sizeof(*rs));
-
-                       /* Set to compatibility mode */
-                       lm.u.long_mode = 0;
-                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
-                       /* Set runstate to new address (kernel will write it) */
-                       struct kvm_xen_vcpu_attr st = {
-                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
-                               .u.gpa = runstate_addr,
-                       };
-                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
-
-                       if (verbose)
-                               printf("Compatibility runstate at %08lx\n", runstate_addr);
-
-                       TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
-                       TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
-                                   "State entry time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
-                                   "Running time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
-                                   "Runnable time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
-                                   "Blocked time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
-                                   "Offline time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
-                                   "Structure overrun");
-                       TEST_ASSERT(crs->state_entry_time == crs->time[0] +
-                                   crs->time[1] + crs->time[2] + crs->time[3],
-                                   "runstate times don't add up");
-
-
-                       /* Now switch to 64-bit mode */
-                       lm.u.long_mode = 1;
-                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
-                       memset(rs, 0xa5, sizeof(*rs));
-
-                       /* Don't change the address, just trigger a write */
-                       struct kvm_xen_vcpu_attr adj = {
-                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
-                               .u.runstate.state = (uint64_t)-1
-                       };
-                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
-
-                       if (verbose)
-                               printf("64-bit runstate at %08lx\n", runstate_addr);
-
-                       TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
-                       TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
-                                   "State entry time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
-                                   "Running time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
-                                   "Runnable time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
-                                   "Blocked time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
-                                   "Offline time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
-                                   "Structure overrun");
-
-                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-                                   rs->time[1] + rs->time[2] + rs->time[3],
-                                   "runstate times don't add up");
-               }
-       }
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c

deleted file mode 100644 (file)

index 2585087..0000000
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * xen_vmcall_test
- *
- * Copyright © 2020 Amazon.com, Inc. or its affiliates.
- *
- * Userspace hypercall testing
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-#define HCALL_REGION_GPA       0xc0000000ULL
-#define HCALL_REGION_SLOT      10
-
-#define INPUTVALUE 17
-#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
-#define RETVALUE 0xcafef00dfbfbffffUL
-
-#define XEN_HYPERCALL_MSR      0x40000200
-#define HV_GUEST_OS_ID_MSR     0x40000000
-#define HV_HYPERCALL_MSR       0x40000001
-
-#define HVCALL_SIGNAL_EVENT            0x005d
-#define HV_STATUS_INVALID_ALIGNMENT    4
-
-static void guest_code(void)
-{
-       unsigned long rax = INPUTVALUE;
-       unsigned long rdi = ARGVALUE(1);
-       unsigned long rsi = ARGVALUE(2);
-       unsigned long rdx = ARGVALUE(3);
-       unsigned long rcx;
-       register unsigned long r10 __asm__("r10") = ARGVALUE(4);
-       register unsigned long r8 __asm__("r8") = ARGVALUE(5);
-       register unsigned long r9 __asm__("r9") = ARGVALUE(6);
-
-       /* First a direct invocation of 'vmcall' */
-       __asm__ __volatile__("vmcall" :
-                            "=a"(rax) :
-                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
-                            "r"(r10), "r"(r8), "r"(r9));
-       GUEST_ASSERT(rax == RETVALUE);
-
-       /* Fill in the Xen hypercall page */
-       __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
-                            "a" (HCALL_REGION_GPA & 0xffffffff),
-                            "d" (HCALL_REGION_GPA >> 32));
-
-       /* Set Hyper-V Guest OS ID */
-       __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
-                            "a" (0x5a), "d" (0));
-
-       /* Hyper-V hypercall page */
-       u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
-       __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
-                            "a" (msrval & 0xffffffff),
-                            "d" (msrval >> 32));
-
-       /* Invoke a Xen hypercall */
-       __asm__ __volatile__("call *%1" : "=a"(rax) :
-                            "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
-                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
-                            "r"(r10), "r"(r8), "r"(r9));
-       GUEST_ASSERT(rax == RETVALUE);
-
-       /* Invoke a Hyper-V hypercall */
-       rax = 0;
-       rcx = HVCALL_SIGNAL_EVENT;      /* code */
-       rdx = 0x5a5a5a5a;               /* ingpa (badly aligned) */
-       __asm__ __volatile__("call *%1" : "=a"(rax) :
-                            "r"(HCALL_REGION_GPA + PAGE_SIZE),
-                            "a"(rax), "c"(rcx), "d"(rdx),
-                            "r"(r8));
-       GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int xen_caps;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_set_hv_cpuid(vcpu);
-
-       struct kvm_xen_hvm_config hvmc = {
-               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
-               .msr = XEN_HYPERCALL_MSR,
-       };
-       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
-       /* Map a region for the hypercall pages */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
-       virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
-
-       for (;;) {
-               volatile struct kvm_run *run = vcpu->run;
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-
-               if (run->exit_reason == KVM_EXIT_XEN) {
-                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
-                       run->xen.u.hcall.result = RETVALUE;
-                       continue;
-               }
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c

deleted file mode 100644 (file)

index f331a4e..0000000
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2019, Google LLC.
- *
- * Tests for the IA32_XSS MSR.
- */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MSR_BITS      64
-
-int main(int argc, char *argv[])
-{
-       bool xss_in_msr_list;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       uint64_t xss_val;
-       int i, r;
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
-
-       xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
-       TEST_ASSERT(xss_val == 0,
-                   "MSR_IA32_XSS should be initialized to zero");
-
-       vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
-
-       /*
-        * At present, KVM only supports a guest IA32_XSS value of 0. Verify
-        * that trying to set the guest IA32_XSS to an unsupported value fails.
-        * Also, in the future when a non-zero value succeeds check that
-        * IA32_XSS is in the list of MSRs to save/restore.
-        */
-       xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
-       for (i = 0; i < MSR_BITS; ++i) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
-
-               /*
-                * Setting a list of MSRs returns the entry that "faulted", or
-                * the last entry +1 if all MSRs were successfully written.
-                */
-               TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
-               TEST_ASSERT(r != 1 || xss_in_msr_list,
-                           "IA32_XSS was able to be set, but was not in save/restore list");
-       }
-
-       kvm_vm_free(vm);
-}
author	Sean Christopherson <seanjc@google.com>
	Thu, 28 Nov 2024 00:55:46 +0000 (16:55 -0800)
committer	Sean Christopherson <seanjc@google.com>
	Wed, 18 Dec 2024 22:15:04 +0000 (14:15 -0800)