F: arch/arm64/include/uapi/asm/kvm*
F: arch/arm64/kvm/
F: include/kvm/arm_*
-F: tools/testing/selftests/kvm/*/aarch64/
-F: tools/testing/selftests/kvm/aarch64/
+F: tools/testing/selftests/kvm/*/arm64/
+F: tools/testing/selftests/kvm/arm64/
KERNEL VIRTUAL MACHINE FOR LOONGARCH (KVM/LoongArch)
M: Tianrui Zhao <zhaotianrui@loongson.cn>
F: arch/s390/mm/gmap.c
F: drivers/s390/char/uvdevice.c
F: tools/testing/selftests/drivers/s390x/uvdevice/
-F: tools/testing/selftests/kvm/*/s390x/
-F: tools/testing/selftests/kvm/s390x/
+F: tools/testing/selftests/kvm/*/s390/
+F: tools/testing/selftests/kvm/s390/
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
M: Sean Christopherson <seanjc@google.com>
F: arch/x86/include/uapi/asm/vmx.h
F: arch/x86/kvm/
F: arch/x86/kvm/*/
-F: tools/testing/selftests/kvm/*/x86_64/
-F: tools/testing/selftests/kvm/x86_64/
+F: tools/testing/selftests/kvm/*/x86/
+F: tools/testing/selftests/kvm/x86/
KERNFS
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
ARCH ?= $(SUBARCH)
ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
-ifeq ($(ARCH),x86)
- ARCH_DIR := x86_64
-else ifeq ($(ARCH),arm64)
- ARCH_DIR := aarch64
-else ifeq ($(ARCH),s390)
- ARCH_DIR := s390x
+# Top-level selftests allows ARCH=x86_64 :-(
+ifeq ($(ARCH),x86_64)
+ ARCH_DIR := x86
else
ARCH_DIR := $(ARCH)
endif
-
include Makefile.kvm
else
# Empty targets for unsupported architectures
LIBKVM_STRING += lib/string_override.c
-LIBKVM_x86_64 += lib/x86_64/apic.c
-LIBKVM_x86_64 += lib/x86_64/handlers.S
-LIBKVM_x86_64 += lib/x86_64/hyperv.c
-LIBKVM_x86_64 += lib/x86_64/memstress.c
-LIBKVM_x86_64 += lib/x86_64/pmu.c
-LIBKVM_x86_64 += lib/x86_64/processor.c
-LIBKVM_x86_64 += lib/x86_64/sev.c
-LIBKVM_x86_64 += lib/x86_64/svm.c
-LIBKVM_x86_64 += lib/x86_64/ucall.c
-LIBKVM_x86_64 += lib/x86_64/vmx.c
-
-LIBKVM_aarch64 += lib/aarch64/gic.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
-LIBKVM_aarch64 += lib/aarch64/handlers.S
-LIBKVM_aarch64 += lib/aarch64/processor.c
-LIBKVM_aarch64 += lib/aarch64/spinlock.c
-LIBKVM_aarch64 += lib/aarch64/ucall.c
-LIBKVM_aarch64 += lib/aarch64/vgic.c
-
-LIBKVM_s390x += lib/s390x/diag318_test_handler.c
-LIBKVM_s390x += lib/s390x/processor.c
-LIBKVM_s390x += lib/s390x/ucall.c
-LIBKVM_s390x += lib/s390x/facility.c
+LIBKVM_x86 += lib/x86/apic.c
+LIBKVM_x86 += lib/x86/handlers.S
+LIBKVM_x86 += lib/x86/hyperv.c
+LIBKVM_x86 += lib/x86/memstress.c
+LIBKVM_x86 += lib/x86/pmu.c
+LIBKVM_x86 += lib/x86/processor.c
+LIBKVM_x86 += lib/x86/sev.c
+LIBKVM_x86 += lib/x86/svm.c
+LIBKVM_x86 += lib/x86/ucall.c
+LIBKVM_x86 += lib/x86/vmx.c
+
+LIBKVM_arm64 += lib/arm64/gic.c
+LIBKVM_arm64 += lib/arm64/gic_v3.c
+LIBKVM_arm64 += lib/arm64/gic_v3_its.c
+LIBKVM_arm64 += lib/arm64/handlers.S
+LIBKVM_arm64 += lib/arm64/processor.c
+LIBKVM_arm64 += lib/arm64/spinlock.c
+LIBKVM_arm64 += lib/arm64/ucall.c
+LIBKVM_arm64 += lib/arm64/vgic.c
+
+LIBKVM_s390 += lib/s390/diag318_test_handler.c
+LIBKVM_s390 += lib/s390/processor.c
+LIBKVM_s390 += lib/s390/ucall.c
+LIBKVM_s390 += lib/s390/facility.c
LIBKVM_riscv += lib/riscv/handlers.S
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
# Non-compiled test targets
-TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
+TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
# Compiled test targets
-TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
-TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
-TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
-TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
-TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
-TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
-TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
-TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
-TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
-TEST_GEN_PROGS_x86_64 += x86_64/smm_test
-TEST_GEN_PROGS_x86_64 += x86_64/state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
-TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
-TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
-TEST_GEN_PROGS_x86_64 += x86_64/amx_test
-TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
-TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
-TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
-TEST_GEN_PROGS_x86_64 += coalesced_io_test
-TEST_GEN_PROGS_x86_64 += demand_paging_test
-TEST_GEN_PROGS_x86_64 += dirty_log_test
-TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
-TEST_GEN_PROGS_x86_64 += guest_memfd_test
-TEST_GEN_PROGS_x86_64 += guest_print_test
-TEST_GEN_PROGS_x86_64 += hardware_disable_test
-TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86_64 += kvm_page_table_test
-TEST_GEN_PROGS_x86_64 += mmu_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_perf_test
-TEST_GEN_PROGS_x86_64 += rseq_test
-TEST_GEN_PROGS_x86_64 += set_memory_region_test
-TEST_GEN_PROGS_x86_64 += steal_time
-TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
-TEST_GEN_PROGS_x86_64 += system_counter_offset_test
-TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
+TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
+TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
+TEST_GEN_PROGS_x86 += x86/hyperv_clock
+TEST_GEN_PROGS_x86 += x86/hyperv_cpuid
+TEST_GEN_PROGS_x86 += x86/hyperv_evmcs
+TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86 += x86/hyperv_features
+TEST_GEN_PROGS_x86 += x86/hyperv_ipi
+TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
+TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
+TEST_GEN_PROGS_x86 += x86/kvm_clock_test
+TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/platform_info_test
+TEST_GEN_PROGS_x86 += x86/pmu_counters_test
+TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
+TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
+TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
+TEST_GEN_PROGS_x86 += x86/set_sregs_test
+TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
+TEST_GEN_PROGS_x86 += x86/smm_test
+TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
+TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
+TEST_GEN_PROGS_x86 += x86/sync_regs_test
+TEST_GEN_PROGS_x86 += x86/ucna_injection_test
+TEST_GEN_PROGS_x86 += x86/userspace_io_test
+TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
+TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
+TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
+TEST_GEN_PROGS_x86 += x86/xss_msr_test
+TEST_GEN_PROGS_x86 += x86/debug_regs
+TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
+TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_init2_tests
+TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
+TEST_GEN_PROGS_x86 += x86/sev_smoke_test
+TEST_GEN_PROGS_x86 += x86/amx_test
+TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
+TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += access_tracking_perf_test
+TEST_GEN_PROGS_x86 += coalesced_io_test
+TEST_GEN_PROGS_x86 += demand_paging_test
+TEST_GEN_PROGS_x86 += dirty_log_test
+TEST_GEN_PROGS_x86 += dirty_log_perf_test
+TEST_GEN_PROGS_x86 += guest_memfd_test
+TEST_GEN_PROGS_x86 += guest_print_test
+TEST_GEN_PROGS_x86 += hardware_disable_test
+TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86 += kvm_page_table_test
+TEST_GEN_PROGS_x86 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86 += memslot_perf_test
+TEST_GEN_PROGS_x86 += mmu_stress_test
+TEST_GEN_PROGS_x86 += rseq_test
+TEST_GEN_PROGS_x86 += set_memory_region_test
+TEST_GEN_PROGS_x86 += steal_time
+TEST_GEN_PROGS_x86 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86 += system_counter_offset_test
+TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
-TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
-
-TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
-TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
-TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort
-TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
-TEST_GEN_PROGS_aarch64 += aarch64/psci_test
-TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
-TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
-TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
-TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
-TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
-TEST_GEN_PROGS_aarch64 += arch_timer
-TEST_GEN_PROGS_aarch64 += coalesced_io_test
-TEST_GEN_PROGS_aarch64 += demand_paging_test
-TEST_GEN_PROGS_aarch64 += dirty_log_test
-TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-TEST_GEN_PROGS_aarch64 += guest_print_test
-TEST_GEN_PROGS_aarch64 += get-reg-list
-TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_aarch64 += kvm_page_table_test
-TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
-TEST_GEN_PROGS_aarch64 += memslot_perf_test
-TEST_GEN_PROGS_aarch64 += mmu_stress_test
-TEST_GEN_PROGS_aarch64 += rseq_test
-TEST_GEN_PROGS_aarch64 += set_memory_region_test
-TEST_GEN_PROGS_aarch64 += steal_time
-TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
-
-TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/resets
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
-TEST_GEN_PROGS_s390x += s390x/tprot
-TEST_GEN_PROGS_s390x += s390x/cmma_test
-TEST_GEN_PROGS_s390x += s390x/debug_test
-TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test
-TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
-TEST_GEN_PROGS_s390x += s390x/ucontrol_test
-TEST_GEN_PROGS_s390x += demand_paging_test
-TEST_GEN_PROGS_s390x += dirty_log_test
-TEST_GEN_PROGS_s390x += guest_print_test
-TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390x += kvm_page_table_test
-TEST_GEN_PROGS_s390x += rseq_test
-TEST_GEN_PROGS_s390x += set_memory_region_test
-TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+
+TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
+TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/page_fault_test
+TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/set_id_regs
+TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+TEST_GEN_PROGS_arm64 += arm64/vgic_init
+TEST_GEN_PROGS_arm64 += arm64/vgic_irq
+TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
+TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += access_tracking_perf_test
+TEST_GEN_PROGS_arm64 += arch_timer
+TEST_GEN_PROGS_arm64 += coalesced_io_test
+TEST_GEN_PROGS_arm64 += demand_paging_test
+TEST_GEN_PROGS_arm64 += dirty_log_test
+TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += guest_print_test
+TEST_GEN_PROGS_arm64 += get-reg-list
+TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_arm64 += kvm_page_table_test
+TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
+TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += mmu_stress_test
+TEST_GEN_PROGS_arm64 += rseq_test
+TEST_GEN_PROGS_arm64 += set_memory_region_test
+TEST_GEN_PROGS_arm64 += steal_time
+TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 += s390/resets
+TEST_GEN_PROGS_s390 += s390/sync_regs_test
+TEST_GEN_PROGS_s390 += s390/tprot
+TEST_GEN_PROGS_s390 += s390/cmma_test
+TEST_GEN_PROGS_s390 += s390/debug_test
+TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
+TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
+TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += demand_paging_test
+TEST_GEN_PROGS_s390 += dirty_log_test
+TEST_GEN_PROGS_s390 += guest_print_test
+TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += rseq_test
+TEST_GEN_PROGS_s390 += set_memory_region_test
+TEST_GEN_PROGS_s390 += kvm_binary_stats_test
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-ifeq ($(ARCH),x86_64)
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
-else
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
-endif
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH_DIR)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
-fno-builtin-memcmp -fno-builtin-memcpy \
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
- *
- * Copyright (c) 2022 Google LLC.
- *
- * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
- * and WI from userspace.
- */
-
-#include <stdint.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-#include <linux/bitfield.h>
-
-#define BAD_ID_REG_VAL 0x1badc0deul
-
-#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
-
-static void guest_main(void)
-{
- GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
- GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
- GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
- GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
- GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
-
- GUEST_DONE();
-}
-
-static void test_guest_raz(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-static uint64_t raz_wi_reg_ids[] = {
- KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
- KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
- KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
- KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
- KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
-};
-
-static void test_user_raz_wi(struct kvm_vcpu *vcpu)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
- uint64_t reg_id = raz_wi_reg_ids[i];
- uint64_t val;
-
- val = vcpu_get_reg(vcpu, reg_id);
- TEST_ASSERT_EQ(val, 0);
-
- /*
- * Expect the ioctl to succeed with no effect on the register
- * value.
- */
- vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
-
- val = vcpu_get_reg(vcpu, reg_id);
- TEST_ASSERT_EQ(val, 0);
- }
-}
-
-static uint64_t raz_invariant_reg_ids[] = {
- KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
- KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
- KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
- KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
-};
-
-static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
-{
- int i, r;
-
- for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
- uint64_t reg_id = raz_invariant_reg_ids[i];
- uint64_t val;
-
- val = vcpu_get_reg(vcpu, reg_id);
- TEST_ASSERT_EQ(val, 0);
-
- r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
- TEST_ASSERT(r < 0 && errno == EINVAL,
- "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
-
- val = vcpu_get_reg(vcpu, reg_id);
- TEST_ASSERT_EQ(val, 0);
- }
-}
-
-
-
-static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
-{
- uint64_t val, el0;
-
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
- return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
- TEST_REQUIRE(vcpu_aarch64_only(vcpu));
-
- test_user_raz_wi(vcpu);
- test_user_raz_invariant(vcpu);
- test_guest_raz(vcpu);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * The test validates both the virtual and physical timer IRQs using
- * CVAL and TVAL registers.
- *
- * Copyright (c) 2021, Google LLC.
- */
-#include "arch_timer.h"
-#include "delay.h"
-#include "gic.h"
-#include "processor.h"
-#include "timer_test.h"
-#include "ucall_common.h"
-#include "vgic.h"
-
-enum guest_stage {
- GUEST_STAGE_VTIMER_CVAL = 1,
- GUEST_STAGE_VTIMER_TVAL,
- GUEST_STAGE_PTIMER_CVAL,
- GUEST_STAGE_PTIMER_TVAL,
- GUEST_STAGE_MAX,
-};
-
-static int vtimer_irq, ptimer_irq;
-
-static void
-guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
-{
- switch (shared_data->guest_stage) {
- case GUEST_STAGE_VTIMER_CVAL:
- timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
- shared_data->xcnt = timer_get_cntct(VIRTUAL);
- timer_set_ctl(VIRTUAL, CTL_ENABLE);
- break;
- case GUEST_STAGE_VTIMER_TVAL:
- timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
- shared_data->xcnt = timer_get_cntct(VIRTUAL);
- timer_set_ctl(VIRTUAL, CTL_ENABLE);
- break;
- case GUEST_STAGE_PTIMER_CVAL:
- timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
- shared_data->xcnt = timer_get_cntct(PHYSICAL);
- timer_set_ctl(PHYSICAL, CTL_ENABLE);
- break;
- case GUEST_STAGE_PTIMER_TVAL:
- timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
- shared_data->xcnt = timer_get_cntct(PHYSICAL);
- timer_set_ctl(PHYSICAL, CTL_ENABLE);
- break;
- default:
- GUEST_ASSERT(0);
- }
-}
-
-static void guest_validate_irq(unsigned int intid,
- struct test_vcpu_shared_data *shared_data)
-{
- enum guest_stage stage = shared_data->guest_stage;
- uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
- unsigned long xctl = 0;
- unsigned int timer_irq = 0;
- unsigned int accessor;
-
- if (intid == IAR_SPURIOUS)
- return;
-
- switch (stage) {
- case GUEST_STAGE_VTIMER_CVAL:
- case GUEST_STAGE_VTIMER_TVAL:
- accessor = VIRTUAL;
- timer_irq = vtimer_irq;
- break;
- case GUEST_STAGE_PTIMER_CVAL:
- case GUEST_STAGE_PTIMER_TVAL:
- accessor = PHYSICAL;
- timer_irq = ptimer_irq;
- break;
- default:
- GUEST_ASSERT(0);
- return;
- }
-
- xctl = timer_get_ctl(accessor);
- if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
- return;
-
- timer_set_ctl(accessor, CTL_IMASK);
- xcnt = timer_get_cntct(accessor);
- cval = timer_get_cval(accessor);
-
- xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
-
- /* Make sure we are dealing with the correct timer IRQ */
- GUEST_ASSERT_EQ(intid, timer_irq);
-
- /* Basic 'timer condition met' check */
- __GUEST_ASSERT(xcnt >= cval,
- "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
- xcnt, cval, xcnt_diff_us);
- __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
-
- WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
-}
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
- unsigned int intid = gic_get_and_ack_irq();
- uint32_t cpu = guest_get_vcpuid();
- struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
-
- guest_validate_irq(intid, shared_data);
-
- gic_set_eoi(intid);
-}
-
-static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
- enum guest_stage stage)
-{
- uint32_t irq_iter, config_iter;
-
- shared_data->guest_stage = stage;
- shared_data->nr_iter = 0;
-
- for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
- /* Setup the next interrupt */
- guest_configure_timer_action(shared_data);
-
- /* Setup a timeout for the interrupt to arrive */
- udelay(msecs_to_usecs(test_args.timer_period_ms) +
- test_args.timer_err_margin_us);
-
- irq_iter = READ_ONCE(shared_data->nr_iter);
- __GUEST_ASSERT(config_iter + 1 == irq_iter,
- "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
- " Guest timer interrupt was not triggered within the specified\n"
- " interval, try to increase the error margin by [-e] option.\n",
- config_iter + 1, irq_iter);
- }
-}
-
-static void guest_code(void)
-{
- uint32_t cpu = guest_get_vcpuid();
- struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
-
- local_irq_disable();
-
- gic_init(GIC_V3, test_args.nr_vcpus);
-
- timer_set_ctl(VIRTUAL, CTL_IMASK);
- timer_set_ctl(PHYSICAL, CTL_IMASK);
-
- gic_irq_enable(vtimer_irq);
- gic_irq_enable(ptimer_irq);
- local_irq_enable();
-
- guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
- guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
- guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
- guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
-
- GUEST_DONE();
-}
-
-static void test_init_timer_irq(struct kvm_vm *vm)
-{
- /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
-
- sync_global_to_guest(vm, ptimer_irq);
- sync_global_to_guest(vm, vtimer_irq);
-
- pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
-}
-
-static int gic_fd;
-
-struct kvm_vm *test_vm_create(void)
-{
- struct kvm_vm *vm;
- unsigned int i;
- int nr_vcpus = test_args.nr_vcpus;
-
- vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
-
- vm_init_descriptor_tables(vm);
- vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
-
- if (!test_args.reserved) {
- if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
- struct kvm_arm_counter_offset offset = {
- .counter_offset = test_args.counter_offset,
- .reserved = 0,
- };
- vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
- } else
- TEST_FAIL("no support for global offset");
- }
-
- for (i = 0; i < nr_vcpus; i++)
- vcpu_init_descriptor_tables(vcpus[i]);
-
- test_init_timer_irq(vm);
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
-
- /* Make all the test's cmdline args visible to the guest */
- sync_global_to_guest(vm, test_args);
-
- return vm;
-}
-
-void test_vm_cleanup(struct kvm_vm *vm)
-{
- close(gic_fd);
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
- *
- * The test validates some edge cases related to the arch-timer:
- * - timers above the max TVAL value.
- * - timers in the past
- * - moving counters ahead and behind pending timers.
- * - reprograming timers.
- * - timers fired multiple times.
- * - masking/unmasking using the timer control mask.
- *
- * Copyright (c) 2021, Google LLC.
- */
-
-#define _GNU_SOURCE
-
-#include <pthread.h>
-#include <sys/sysinfo.h>
-
-#include "arch_timer.h"
-#include "gic.h"
-#include "vgic.h"
-
-static const uint64_t CVAL_MAX = ~0ULL;
-/* tval is a signed 32-bit int. */
-static const int32_t TVAL_MAX = INT32_MAX;
-static const int32_t TVAL_MIN = INT32_MIN;
-
-/* After how much time we say there is no IRQ. */
-static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
-
-/* Number of runs. */
-static const uint32_t NR_TEST_ITERS_DEF = 5;
-
-/* Default wait test time in ms. */
-static const uint32_t WAIT_TEST_MS = 10;
-
-/* Default "long" wait test time in ms. */
-static const uint32_t LONG_WAIT_TEST_MS = 100;
-
-/* Shared with IRQ handler. */
-struct test_vcpu_shared_data {
- atomic_t handled;
- atomic_t spurious;
-} shared_data;
-
-struct test_args {
- /* Virtual or physical timer and counter tests. */
- enum arch_timer timer;
- /* Delay used for most timer tests. */
- uint64_t wait_ms;
- /* Delay used in the test_long_timer_delays test. */
- uint64_t long_wait_ms;
- /* Number of iterations. */
- int iterations;
- /* Whether to test the physical timer. */
- bool test_physical;
- /* Whether to test the virtual timer. */
- bool test_virtual;
-};
-
-struct test_args test_args = {
- .wait_ms = WAIT_TEST_MS,
- .long_wait_ms = LONG_WAIT_TEST_MS,
- .iterations = NR_TEST_ITERS_DEF,
- .test_physical = true,
- .test_virtual = true,
-};
-
-static int vtimer_irq, ptimer_irq;
-
-enum sync_cmd {
- SET_COUNTER_VALUE,
- USERSPACE_USLEEP,
- USERSPACE_SCHED_YIELD,
- USERSPACE_MIGRATE_SELF,
- NO_USERSPACE_CMD,
-};
-
-typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
-
-static void sleep_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
-static void sleep_migrate(enum arch_timer timer, uint64_t usec);
-
-sleep_method_t sleep_method[] = {
- sleep_poll,
- sleep_sched_poll,
- sleep_migrate,
- sleep_in_userspace,
-};
-
-typedef void (*irq_wait_method_t)(void);
-
-static void wait_for_non_spurious_irq(void);
-static void wait_poll_for_irq(void);
-static void wait_sched_poll_for_irq(void);
-static void wait_migrate_poll_for_irq(void);
-
-irq_wait_method_t irq_wait_method[] = {
- wait_for_non_spurious_irq,
- wait_poll_for_irq,
- wait_sched_poll_for_irq,
- wait_migrate_poll_for_irq,
-};
-
-enum timer_view {
- TIMER_CVAL,
- TIMER_TVAL,
-};
-
-static void assert_irqs_handled(uint32_t n)
-{
- int h = atomic_read(&shared_data.handled);
-
- __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
-}
-
-static void userspace_cmd(uint64_t cmd)
-{
- GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
-}
-
-static void userspace_migrate_vcpu(void)
-{
- userspace_cmd(USERSPACE_MIGRATE_SELF);
-}
-
-static void userspace_sleep(uint64_t usecs)
-{
- GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
-}
-
-static void set_counter(enum arch_timer timer, uint64_t counter)
-{
- GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
-}
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
- unsigned int intid = gic_get_and_ack_irq();
- enum arch_timer timer;
- uint64_t cnt, cval;
- uint32_t ctl;
- bool timer_condition, istatus;
-
- if (intid == IAR_SPURIOUS) {
- atomic_inc(&shared_data.spurious);
- goto out;
- }
-
- if (intid == ptimer_irq)
- timer = PHYSICAL;
- else if (intid == vtimer_irq)
- timer = VIRTUAL;
- else
- goto out;
-
- ctl = timer_get_ctl(timer);
- cval = timer_get_cval(timer);
- cnt = timer_get_cntct(timer);
- timer_condition = cnt >= cval;
- istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
- GUEST_ASSERT_EQ(timer_condition, istatus);
-
- /* Disable and mask the timer. */
- timer_set_ctl(timer, CTL_IMASK);
-
- atomic_inc(&shared_data.handled);
-
-out:
- gic_set_eoi(intid);
-}
-
-static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
- uint32_t ctl)
-{
- atomic_set(&shared_data.handled, 0);
- atomic_set(&shared_data.spurious, 0);
- timer_set_cval(timer, cval_cycles);
- timer_set_ctl(timer, ctl);
-}
-
-static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
- uint32_t ctl)
-{
- atomic_set(&shared_data.handled, 0);
- atomic_set(&shared_data.spurious, 0);
- timer_set_ctl(timer, ctl);
- timer_set_tval(timer, tval_cycles);
-}
-
-static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
- enum timer_view tv)
-{
- switch (tv) {
- case TIMER_CVAL:
- set_cval_irq(timer, xval, ctl);
- break;
- case TIMER_TVAL:
- set_tval_irq(timer, xval, ctl);
- break;
- default:
- GUEST_FAIL("Could not get timer %d", timer);
- }
-}
-
-/*
- * Note that this can theoretically hang forever, so we rely on having
- * a timeout mechanism in the "runner", like:
- * tools/testing/selftests/kselftest/runner.sh.
- */
-static void wait_for_non_spurious_irq(void)
-{
- int h;
-
- local_irq_disable();
-
- for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
- wfi();
- local_irq_enable();
- isb(); /* handle IRQ */
- local_irq_disable();
- }
-}
-
-/*
- * Wait for an non-spurious IRQ by polling in the guest or in
- * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
- *
- * Note that this can theoretically hang forever, so we rely on having
- * a timeout mechanism in the "runner", like:
- * tools/testing/selftests/kselftest/runner.sh.
- */
-static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
-{
- int h;
-
- local_irq_disable();
-
- h = atomic_read(&shared_data.handled);
-
- local_irq_enable();
- while (h == atomic_read(&shared_data.handled)) {
- if (usp_cmd == NO_USERSPACE_CMD)
- cpu_relax();
- else
- userspace_cmd(usp_cmd);
- }
- local_irq_disable();
-}
-
-static void wait_poll_for_irq(void)
-{
- poll_for_non_spurious_irq(NO_USERSPACE_CMD);
-}
-
-static void wait_sched_poll_for_irq(void)
-{
- poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
-}
-
-static void wait_migrate_poll_for_irq(void)
-{
- poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
-}
-
-/*
- * Sleep for usec microseconds by polling in the guest or in
- * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
- */
-static void guest_poll(enum arch_timer test_timer, uint64_t usec,
- enum sync_cmd usp_cmd)
-{
- uint64_t cycles = usec_to_cycles(usec);
- /* Whichever timer we are testing with, sleep with the other. */
- enum arch_timer sleep_timer = 1 - test_timer;
- uint64_t start = timer_get_cntct(sleep_timer);
-
- while ((timer_get_cntct(sleep_timer) - start) < cycles) {
- if (usp_cmd == NO_USERSPACE_CMD)
- cpu_relax();
- else
- userspace_cmd(usp_cmd);
- }
-}
-
-static void sleep_poll(enum arch_timer timer, uint64_t usec)
-{
- guest_poll(timer, usec, NO_USERSPACE_CMD);
-}
-
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
-{
- guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
-}
-
-static void sleep_migrate(enum arch_timer timer, uint64_t usec)
-{
- guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
-}
-
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
-{
- userspace_sleep(usec);
-}
-
-/*
- * Reset the timer state to some nice values like the counter not being close
- * to the edge, and the control register masked and disabled.
- */
-static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
-{
- set_counter(timer, cnt);
- timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_timer_xval(enum arch_timer timer, uint64_t xval,
- enum timer_view tv, irq_wait_method_t wm, bool reset_state,
- uint64_t reset_cnt)
-{
- local_irq_disable();
-
- if (reset_state)
- reset_timer_state(timer, reset_cnt);
-
- set_xval_irq(timer, xval, CTL_ENABLE, tv);
-
- /* This method re-enables IRQs to handle the one we're looking for. */
- wm();
-
- assert_irqs_handled(1);
- local_irq_enable();
-}
-
-/*
- * The test_timer_* functions will program the timer, wait for it, and assert
- * the firing of the correct IRQ.
- *
- * These functions don't have a timeout and return as soon as they receive an
- * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
- * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
- */
-
-static void test_timer_cval(enum arch_timer timer, uint64_t cval,
- irq_wait_method_t wm, bool reset_state,
- uint64_t reset_cnt)
-{
- test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
-}
-
-static void test_timer_tval(enum arch_timer timer, int32_t tval,
- irq_wait_method_t wm, bool reset_state,
- uint64_t reset_cnt)
-{
- test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
- reset_cnt);
-}
-
-static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
- uint64_t usec, enum timer_view timer_view,
- sleep_method_t guest_sleep)
-{
- local_irq_disable();
-
- set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
- guest_sleep(timer, usec);
-
- local_irq_enable();
- isb();
-
- /* Assume success (no IRQ) after waiting usec microseconds */
- assert_irqs_handled(0);
-}
-
-static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
- uint64_t usec, sleep_method_t wm)
-{
- test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
-}
-
-static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
- sleep_method_t wm)
-{
- /* tval will be cast to an int32_t in test_xval_check_no_irq */
- test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
-}
-
-/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
-static void test_timer_control_mask_then_unmask(enum arch_timer timer)
-{
- reset_timer_state(timer, DEF_CNT);
- set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
-
- /* Unmask the timer, and then get an IRQ. */
- local_irq_disable();
- timer_set_ctl(timer, CTL_ENABLE);
- /* This method re-enables IRQs to handle the one we're looking for. */
- wait_for_non_spurious_irq();
-
- assert_irqs_handled(1);
- local_irq_enable();
-}
-
-/* Check that timer control masks actually mask a timer being fired. */
-static void test_timer_control_masks(enum arch_timer timer)
-{
- reset_timer_state(timer, DEF_CNT);
-
- /* Local IRQs are not masked at this point. */
-
- set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
-
- /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
- sleep_poll(timer, TIMEOUT_NO_IRQ_US);
-
- assert_irqs_handled(0);
- timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_fire_a_timer_multiple_times(enum arch_timer timer,
- irq_wait_method_t wm, int num)
-{
- int i;
-
- local_irq_disable();
- reset_timer_state(timer, DEF_CNT);
-
- set_tval_irq(timer, 0, CTL_ENABLE);
-
- for (i = 1; i <= num; i++) {
- /* This method re-enables IRQs to handle the one we're looking for. */
- wm();
-
- /* The IRQ handler masked and disabled the timer.
- * Enable and unmmask it again.
- */
- timer_set_ctl(timer, CTL_ENABLE);
-
- assert_irqs_handled(i);
- }
-
- local_irq_enable();
-}
-
-static void test_timers_fired_multiple_times(enum arch_timer timer)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
- test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
-}
-
-/*
- * Set a timer for tval=delta_1_ms then reprogram it to
- * tval=delta_2_ms. Check that we get the timer fired. There is no
- * timeout for the wait: we use the wfi instruction.
- */
-static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
- int32_t delta_1_ms, int32_t delta_2_ms)
-{
- local_irq_disable();
- reset_timer_state(timer, DEF_CNT);
-
- /* Program the timer to DEF_CNT + delta_1_ms. */
- set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
-
- /* Reprogram the timer to DEF_CNT + delta_2_ms. */
- timer_set_tval(timer, msec_to_cycles(delta_2_ms));
-
- /* This method re-enables IRQs to handle the one we're looking for. */
- wm();
-
- /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
- GUEST_ASSERT(timer_get_cntct(timer) >=
- DEF_CNT + msec_to_cycles(delta_2_ms));
-
- local_irq_enable();
- assert_irqs_handled(1);
-};
-
-static void test_reprogram_timers(enum arch_timer timer)
-{
- int i;
- uint64_t base_wait = test_args.wait_ms;
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
- /*
- * Ensure reprogramming works whether going from a
- * longer time to a shorter or vice versa.
- */
- test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
- base_wait);
- test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
- 2 * base_wait);
- }
-}
-
-static void test_basic_functionality(enum arch_timer timer)
-{
- int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
- uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
- irq_wait_method_t wm = irq_wait_method[i];
-
- test_timer_cval(timer, cval, wm, true, DEF_CNT);
- test_timer_tval(timer, tval, wm, true, DEF_CNT);
- }
-}
-
-/*
- * This test checks basic timer behavior without actually firing timers, things
- * like: the relationship between cval and tval, tval down-counting.
- */
-static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
-{
- reset_timer_state(timer, DEF_CNT);
-
- local_irq_disable();
-
- /* cval in the past */
- timer_set_cval(timer,
- timer_get_cntct(timer) -
- msec_to_cycles(test_args.wait_ms));
- if (use_sched)
- userspace_migrate_vcpu();
- GUEST_ASSERT(timer_get_tval(timer) < 0);
-
- /* tval in the past */
- timer_set_tval(timer, -1);
- if (use_sched)
- userspace_migrate_vcpu();
- GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
-
- /* tval larger than TVAL_MAX. This requires programming with
- * timer_set_cval instead so the value is expressible
- */
- timer_set_cval(timer,
- timer_get_cntct(timer) + TVAL_MAX +
- msec_to_cycles(test_args.wait_ms));
- if (use_sched)
- userspace_migrate_vcpu();
- GUEST_ASSERT(timer_get_tval(timer) <= 0);
-
- /*
- * tval larger than 2 * TVAL_MAX.
- * Twice the TVAL_MAX completely loops around the TVAL.
- */
- timer_set_cval(timer,
- timer_get_cntct(timer) + 2ULL * TVAL_MAX +
- msec_to_cycles(test_args.wait_ms));
- if (use_sched)
- userspace_migrate_vcpu();
- GUEST_ASSERT(timer_get_tval(timer) <=
- msec_to_cycles(test_args.wait_ms));
-
- /* negative tval that rollovers from 0. */
- set_counter(timer, msec_to_cycles(1));
- timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
- if (use_sched)
- userspace_migrate_vcpu();
- GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
-
- /* tval should keep down-counting from 0 to -1. */
- timer_set_tval(timer, 0);
- sleep_poll(timer, 1);
- GUEST_ASSERT(timer_get_tval(timer) < 0);
-
- local_irq_enable();
-
- /* Mask and disable any pending timer. */
- timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_timers_sanity_checks(enum arch_timer timer)
-{
- timers_sanity_checks(timer, false);
- /* Check how KVM saves/restores these edge-case values. */
- timers_sanity_checks(timer, true);
-}
-
-static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
-{
- local_irq_disable();
- reset_timer_state(timer, DEF_CNT);
-
- set_cval_irq(timer,
- (uint64_t) TVAL_MAX +
- msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
-
- set_counter(timer, TVAL_MAX);
-
- /* This method re-enables IRQs to handle the one we're looking for. */
- wm();
-
- assert_irqs_handled(1);
- local_irq_enable();
-}
-
-/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
-static void test_timers_above_tval_max(enum arch_timer timer)
-{
- uint64_t cval;
- int i;
-
- /*
- * Test that the system is not implementing cval in terms of
- * tval. If that was the case, setting a cval to "cval = now
- * + TVAL_MAX + wait_ms" would wrap to "cval = now +
- * wait_ms", and the timer would fire immediately. Test that it
- * doesn't.
- */
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- reset_timer_state(timer, DEF_CNT);
- cval = timer_get_cntct(timer) + TVAL_MAX +
- msec_to_cycles(test_args.wait_ms);
- test_cval_no_irq(timer, cval,
- msecs_to_usecs(test_args.wait_ms) +
- TIMEOUT_NO_IRQ_US, sleep_method[i]);
- }
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
- /* Get the IRQ by moving the counter forward. */
- test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
- }
-}
-
-/*
- * Template function to be used by the test_move_counter_ahead_* tests. It
- * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
- * then waits for an IRQ.
- */
-static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
- uint64_t xval, uint64_t cnt_2,
- irq_wait_method_t wm, enum timer_view tv)
-{
- local_irq_disable();
-
- set_counter(timer, cnt_1);
- timer_set_ctl(timer, CTL_IMASK);
-
- set_xval_irq(timer, xval, CTL_ENABLE, tv);
- set_counter(timer, cnt_2);
- /* This method re-enables IRQs to handle the one we're looking for. */
- wm();
-
- assert_irqs_handled(1);
- local_irq_enable();
-}
-
-/*
- * Template function to be used by the test_move_counter_ahead_* tests. It
- * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
- * then waits for an IRQ.
- */
-static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
- uint64_t cnt_1, uint64_t xval,
- uint64_t cnt_2,
- sleep_method_t guest_sleep,
- enum timer_view tv)
-{
- local_irq_disable();
-
- set_counter(timer, cnt_1);
- timer_set_ctl(timer, CTL_IMASK);
-
- set_xval_irq(timer, xval, CTL_ENABLE, tv);
- set_counter(timer, cnt_2);
- guest_sleep(timer, TIMEOUT_NO_IRQ_US);
-
- local_irq_enable();
- isb();
-
- /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
- assert_irqs_handled(0);
- timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
- int32_t tval, uint64_t cnt_2,
- irq_wait_method_t wm)
-{
- test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
-}
-
-static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
- uint64_t cval, uint64_t cnt_2,
- irq_wait_method_t wm)
-{
- test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
-}
-
-static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
- uint64_t cnt_1, int32_t tval,
- uint64_t cnt_2, sleep_method_t wm)
-{
- test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
- TIMER_TVAL);
-}
-
-static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
- uint64_t cnt_1, uint64_t cval,
- uint64_t cnt_2, sleep_method_t wm)
-{
- test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
- TIMER_CVAL);
-}
-
-/* Set a timer and then move the counter ahead of it. */
-static void test_move_counters_ahead_of_timers(enum arch_timer timer)
-{
- int i;
- int32_t tval;
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
- irq_wait_method_t wm = irq_wait_method[i];
-
- test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
- test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
-
- /* Move counter ahead of negative tval. */
- test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
- test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
- tval = TVAL_MAX;
- test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
- wm);
- }
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
- }
-}
-
-/*
- * Program a timer, mask it, and then change the tval or counter to cancel it.
- * Unmask it and check that nothing fires.
- */
-static void test_move_counters_behind_timers(enum arch_timer timer)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
- sm);
- test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
- }
-}
-
-static void test_timers_in_the_past(enum arch_timer timer)
-{
- int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
- uint64_t cval;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
- irq_wait_method_t wm = irq_wait_method[i];
-
- /* set a timer wait_ms the past. */
- cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
- test_timer_cval(timer, cval, wm, true, DEF_CNT);
- test_timer_tval(timer, tval, wm, true, DEF_CNT);
-
- /* Set a timer to counter=0 (in the past) */
- test_timer_cval(timer, 0, wm, true, DEF_CNT);
-
- /* Set a time for tval=0 (now) */
- test_timer_tval(timer, 0, wm, true, DEF_CNT);
-
- /* Set a timer to as far in the past as possible */
- test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
- }
-
- /*
- * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
- * IRQ as that tval means cval=CVAL_MAX-wait_ms.
- */
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- set_counter(timer, msec_to_cycles(test_args.wait_ms));
- test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
- }
-}
-
-static void test_long_timer_delays(enum arch_timer timer)
-{
- int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
- uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
- irq_wait_method_t wm = irq_wait_method[i];
-
- test_timer_cval(timer, cval, wm, true, DEF_CNT);
- test_timer_tval(timer, tval, wm, true, DEF_CNT);
- }
-}
-
-static void guest_run_iteration(enum arch_timer timer)
-{
- test_basic_functionality(timer);
- test_timers_sanity_checks(timer);
-
- test_timers_above_tval_max(timer);
- test_timers_in_the_past(timer);
-
- test_move_counters_ahead_of_timers(timer);
- test_move_counters_behind_timers(timer);
- test_reprogram_timers(timer);
-
- test_timers_fired_multiple_times(timer);
-
- test_timer_control_mask_then_unmask(timer);
- test_timer_control_masks(timer);
-}
-
-static void guest_code(enum arch_timer timer)
-{
- int i;
-
- local_irq_disable();
-
- gic_init(GIC_V3, 1);
-
- timer_set_ctl(VIRTUAL, CTL_IMASK);
- timer_set_ctl(PHYSICAL, CTL_IMASK);
-
- gic_irq_enable(vtimer_irq);
- gic_irq_enable(ptimer_irq);
- local_irq_enable();
-
- for (i = 0; i < test_args.iterations; i++) {
- GUEST_SYNC(i);
- guest_run_iteration(timer);
- }
-
- test_long_timer_delays(timer);
- GUEST_DONE();
-}
-
-static uint32_t next_pcpu(void)
-{
- uint32_t max = get_nprocs();
- uint32_t cur = sched_getcpu();
- uint32_t next = cur;
- cpu_set_t cpuset;
-
- TEST_ASSERT(max > 1, "Need at least two physical cpus");
-
- sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
- do {
- next = (next + 1) % CPU_SETSIZE;
- } while (!CPU_ISSET(next, &cpuset));
-
- return next;
-}
-
-static void migrate_self(uint32_t new_pcpu)
-{
- int ret;
- cpu_set_t cpuset;
- pthread_t thread;
-
- thread = pthread_self();
-
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
- pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
- ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
- TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
- new_pcpu, ret);
-}
-
-static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
- enum arch_timer timer)
-{
- if (timer == PHYSICAL)
- vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
- else
- vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
-}
-
-static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
-{
- enum sync_cmd cmd = uc->args[1];
- uint64_t val = uc->args[2];
- enum arch_timer timer = uc->args[3];
-
- switch (cmd) {
- case SET_COUNTER_VALUE:
- kvm_set_cntxct(vcpu, val, timer);
- break;
- case USERSPACE_USLEEP:
- usleep(val);
- break;
- case USERSPACE_SCHED_YIELD:
- sched_yield();
- break;
- case USERSPACE_MIGRATE_SELF:
- migrate_self(next_pcpu());
- break;
- default:
- break;
- }
-}
-
-static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- /* Start on CPU 0 */
- migrate_self(0);
-
- while (true) {
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- handle_sync(vcpu, &uc);
- break;
- case UCALL_DONE:
- goto out;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto out;
- default:
- TEST_FAIL("Unexpected guest exit\n");
- }
- }
-
- out:
- return;
-}
-
-static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
-
- sync_global_to_guest(vm, ptimer_irq);
- sync_global_to_guest(vm, vtimer_irq);
-
- pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
-}
-
-static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
- enum arch_timer timer)
-{
- *vm = vm_create_with_one_vcpu(vcpu, guest_code);
- TEST_ASSERT(*vm, "Failed to create the test VM\n");
-
- vm_init_descriptor_tables(*vm);
- vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
- guest_irq_handler);
-
- vcpu_init_descriptor_tables(*vcpu);
- vcpu_args_set(*vcpu, 1, timer);
-
- test_init_timer_irq(*vm, *vcpu);
- vgic_v3_setup(*vm, 1, 64);
- sync_global_to_guest(*vm, test_args);
-}
-
-static void test_print_help(char *name)
-{
- pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
- , name);
- pr_info("\t-i: Number of iterations (default: %u)\n",
- NR_TEST_ITERS_DEF);
- pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
- pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
- LONG_WAIT_TEST_MS);
- pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
- WAIT_TEST_MS);
- pr_info("\t-p: Test physical timer (default: true)\n");
- pr_info("\t-v: Test virtual timer (default: true)\n");
- pr_info("\t-h: Print this help message\n");
-}
-
-static bool parse_args(int argc, char *argv[])
-{
- int opt;
-
- while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
- switch (opt) {
- case 'b':
- test_args.test_physical = true;
- test_args.test_virtual = true;
- break;
- case 'i':
- test_args.iterations =
- atoi_positive("Number of iterations", optarg);
- break;
- case 'l':
- test_args.long_wait_ms =
- atoi_positive("Long wait time", optarg);
- break;
- case 'p':
- test_args.test_physical = true;
- test_args.test_virtual = false;
- break;
- case 'v':
- test_args.test_virtual = true;
- test_args.test_physical = false;
- break;
- case 'w':
- test_args.wait_ms = atoi_positive("Wait time", optarg);
- break;
- case 'h':
- default:
- goto err;
- }
- }
-
- return true;
-
- err:
- test_print_help(argv[0]);
- return false;
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- if (!parse_args(argc, argv))
- exit(KSFT_SKIP);
-
- if (test_args.test_virtual) {
- test_vm_create(&vm, &vcpu, VIRTUAL);
- test_run(vm, vcpu);
- kvm_vm_free(vm);
- }
-
- if (test_args.test_physical) {
- test_vm_create(&vm, &vcpu, PHYSICAL);
- test_run(vm, vcpu);
- kvm_vm_free(vm);
- }
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-#include <linux/bitfield.h>
-
-#define MDSCR_KDE (1 << 13)
-#define MDSCR_MDE (1 << 15)
-#define MDSCR_SS (1 << 0)
-
-#define DBGBCR_LEN8 (0xff << 5)
-#define DBGBCR_EXEC (0x0 << 3)
-#define DBGBCR_EL1 (0x1 << 1)
-#define DBGBCR_E (0x1 << 0)
-#define DBGBCR_LBN_SHIFT 16
-#define DBGBCR_BT_SHIFT 20
-#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT)
-#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT)
-
-#define DBGWCR_LEN8 (0xff << 5)
-#define DBGWCR_RD (0x1 << 3)
-#define DBGWCR_WR (0x2 << 3)
-#define DBGWCR_EL1 (0x1 << 1)
-#define DBGWCR_E (0x1 << 0)
-#define DBGWCR_LBN_SHIFT 16
-#define DBGWCR_WT_SHIFT 20
-#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT)
-
-#define SPSR_D (1 << 9)
-#define SPSR_SS (1 << 21)
-
-extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
-extern unsigned char iter_ss_begin, iter_ss_end;
-static volatile uint64_t sw_bp_addr, hw_bp_addr;
-static volatile uint64_t wp_addr, wp_data_addr;
-static volatile uint64_t svc_addr;
-static volatile uint64_t ss_addr[4], ss_idx;
-#define PC(v) ((uint64_t)&(v))
-
-#define GEN_DEBUG_WRITE_REG(reg_name) \
-static void write_##reg_name(int num, uint64_t val) \
-{ \
- switch (num) { \
- case 0: \
- write_sysreg(val, reg_name##0_el1); \
- break; \
- case 1: \
- write_sysreg(val, reg_name##1_el1); \
- break; \
- case 2: \
- write_sysreg(val, reg_name##2_el1); \
- break; \
- case 3: \
- write_sysreg(val, reg_name##3_el1); \
- break; \
- case 4: \
- write_sysreg(val, reg_name##4_el1); \
- break; \
- case 5: \
- write_sysreg(val, reg_name##5_el1); \
- break; \
- case 6: \
- write_sysreg(val, reg_name##6_el1); \
- break; \
- case 7: \
- write_sysreg(val, reg_name##7_el1); \
- break; \
- case 8: \
- write_sysreg(val, reg_name##8_el1); \
- break; \
- case 9: \
- write_sysreg(val, reg_name##9_el1); \
- break; \
- case 10: \
- write_sysreg(val, reg_name##10_el1); \
- break; \
- case 11: \
- write_sysreg(val, reg_name##11_el1); \
- break; \
- case 12: \
- write_sysreg(val, reg_name##12_el1); \
- break; \
- case 13: \
- write_sysreg(val, reg_name##13_el1); \
- break; \
- case 14: \
- write_sysreg(val, reg_name##14_el1); \
- break; \
- case 15: \
- write_sysreg(val, reg_name##15_el1); \
- break; \
- default: \
- GUEST_ASSERT(0); \
- } \
-}
-
-/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
-GEN_DEBUG_WRITE_REG(dbgbcr)
-GEN_DEBUG_WRITE_REG(dbgbvr)
-GEN_DEBUG_WRITE_REG(dbgwcr)
-GEN_DEBUG_WRITE_REG(dbgwvr)
-
-static void reset_debug_state(void)
-{
- uint8_t brps, wrps, i;
- uint64_t dfr0;
-
- asm volatile("msr daifset, #8");
-
- write_sysreg(0, osdlr_el1);
- write_sysreg(0, oslar_el1);
- isb();
-
- write_sysreg(0, mdscr_el1);
- write_sysreg(0, contextidr_el1);
-
- /* Reset all bcr/bvr/wcr/wvr registers */
- dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
- for (i = 0; i <= brps; i++) {
- write_dbgbcr(i, 0);
- write_dbgbvr(i, 0);
- }
- wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
- for (i = 0; i <= wrps; i++) {
- write_dbgwcr(i, 0);
- write_dbgwvr(i, 0);
- }
-
- isb();
-}
-
-static void enable_os_lock(void)
-{
- write_sysreg(1, oslar_el1);
- isb();
-
- GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
-}
-
-static void enable_monitor_debug_exceptions(void)
-{
- uint32_t mdscr;
-
- asm volatile("msr daifclr, #8");
-
- mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
- write_sysreg(mdscr, mdscr_el1);
- isb();
-}
-
-static void install_wp(uint8_t wpn, uint64_t addr)
-{
- uint32_t wcr;
-
- wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
- write_dbgwcr(wpn, wcr);
- write_dbgwvr(wpn, addr);
-
- isb();
-
- enable_monitor_debug_exceptions();
-}
-
-static void install_hw_bp(uint8_t bpn, uint64_t addr)
-{
- uint32_t bcr;
-
- bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
- write_dbgbcr(bpn, bcr);
- write_dbgbvr(bpn, addr);
- isb();
-
- enable_monitor_debug_exceptions();
-}
-
-static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
- uint64_t ctx)
-{
- uint32_t wcr;
- uint64_t ctx_bcr;
-
- /* Setup a context-aware breakpoint for Linked Context ID Match */
- ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
- DBGBCR_BT_CTX_LINK;
- write_dbgbcr(ctx_bp, ctx_bcr);
- write_dbgbvr(ctx_bp, ctx);
-
- /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
- wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
- DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
- write_dbgwcr(addr_wp, wcr);
- write_dbgwvr(addr_wp, addr);
- isb();
-
- enable_monitor_debug_exceptions();
-}
-
-void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
- uint64_t ctx)
-{
- uint32_t addr_bcr, ctx_bcr;
-
- /* Setup a context-aware breakpoint for Linked Context ID Match */
- ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
- DBGBCR_BT_CTX_LINK;
- write_dbgbcr(ctx_bp, ctx_bcr);
- write_dbgbvr(ctx_bp, ctx);
-
- /*
- * Setup a normal breakpoint for Linked Address Match, and link it
- * to the context-aware breakpoint.
- */
- addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
- DBGBCR_BT_ADDR_LINK_CTX |
- ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
- write_dbgbcr(addr_bp, addr_bcr);
- write_dbgbvr(addr_bp, addr);
- isb();
-
- enable_monitor_debug_exceptions();
-}
-
-static void install_ss(void)
-{
- uint32_t mdscr;
-
- asm volatile("msr daifclr, #8");
-
- mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
- write_sysreg(mdscr, mdscr_el1);
- isb();
-}
-
-static volatile char write_data;
-
-static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
-{
- uint64_t ctx = 0xabcdef; /* a random context number */
-
- /* Software-breakpoint */
- reset_debug_state();
- asm volatile("sw_bp: brk #0");
- GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
-
- /* Hardware-breakpoint */
- reset_debug_state();
- install_hw_bp(bpn, PC(hw_bp));
- asm volatile("hw_bp: nop");
- GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
-
- /* Hardware-breakpoint + svc */
- reset_debug_state();
- install_hw_bp(bpn, PC(bp_svc));
- asm volatile("bp_svc: svc #0");
- GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
- GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
-
- /* Hardware-breakpoint + software-breakpoint */
- reset_debug_state();
- install_hw_bp(bpn, PC(bp_brk));
- asm volatile("bp_brk: brk #0");
- GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
- GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
-
- /* Watchpoint */
- reset_debug_state();
- install_wp(wpn, PC(write_data));
- write_data = 'x';
- GUEST_ASSERT_EQ(write_data, 'x');
- GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
-
- /* Single-step */
- reset_debug_state();
- install_ss();
- ss_idx = 0;
- asm volatile("ss_start:\n"
- "mrs x0, esr_el1\n"
- "add x0, x0, #1\n"
- "msr daifset, #8\n"
- : : : "x0");
- GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
- GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
- GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
-
- /* OS Lock does not block software-breakpoint */
- reset_debug_state();
- enable_os_lock();
- sw_bp_addr = 0;
- asm volatile("sw_bp2: brk #0");
- GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
-
- /* OS Lock blocking hardware-breakpoint */
- reset_debug_state();
- enable_os_lock();
- install_hw_bp(bpn, PC(hw_bp2));
- hw_bp_addr = 0;
- asm volatile("hw_bp2: nop");
- GUEST_ASSERT_EQ(hw_bp_addr, 0);
-
- /* OS Lock blocking watchpoint */
- reset_debug_state();
- enable_os_lock();
- write_data = '\0';
- wp_data_addr = 0;
- install_wp(wpn, PC(write_data));
- write_data = 'x';
- GUEST_ASSERT_EQ(write_data, 'x');
- GUEST_ASSERT_EQ(wp_data_addr, 0);
-
- /* OS Lock blocking single-step */
- reset_debug_state();
- enable_os_lock();
- ss_addr[0] = 0;
- install_ss();
- ss_idx = 0;
- asm volatile("mrs x0, esr_el1\n\t"
- "add x0, x0, #1\n\t"
- "msr daifset, #8\n\t"
- : : : "x0");
- GUEST_ASSERT_EQ(ss_addr[0], 0);
-
- /* Linked hardware-breakpoint */
- hw_bp_addr = 0;
- reset_debug_state();
- install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
- /* Set context id */
- write_sysreg(ctx, contextidr_el1);
- isb();
- asm volatile("hw_bp_ctx: nop");
- write_sysreg(0, contextidr_el1);
- GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
-
- /* Linked watchpoint */
- reset_debug_state();
- install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
- /* Set context id */
- write_sysreg(ctx, contextidr_el1);
- isb();
- write_data = 'x';
- GUEST_ASSERT_EQ(write_data, 'x');
- GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
-
- GUEST_DONE();
-}
-
-static void guest_sw_bp_handler(struct ex_regs *regs)
-{
- sw_bp_addr = regs->pc;
- regs->pc += 4;
-}
-
-static void guest_hw_bp_handler(struct ex_regs *regs)
-{
- hw_bp_addr = regs->pc;
- regs->pstate |= SPSR_D;
-}
-
-static void guest_wp_handler(struct ex_regs *regs)
-{
- wp_data_addr = read_sysreg(far_el1);
- wp_addr = regs->pc;
- regs->pstate |= SPSR_D;
-}
-
-static void guest_ss_handler(struct ex_regs *regs)
-{
- __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
- ss_addr[ss_idx++] = regs->pc;
- regs->pstate |= SPSR_SS;
-}
-
-static void guest_svc_handler(struct ex_regs *regs)
-{
- svc_addr = regs->pc;
-}
-
-static void guest_code_ss(int test_cnt)
-{
- uint64_t i;
- uint64_t bvr, wvr, w_bvr, w_wvr;
-
- for (i = 0; i < test_cnt; i++) {
- /* Bits [1:0] of dbg{b,w}vr are RES0 */
- w_bvr = i << 2;
- w_wvr = i << 2;
-
- /*
- * Enable Single Step execution. Note! This _must_ be a bare
- * ucall as the ucall() path uses atomic operations to manage
- * the ucall structures, and the built-in "atomics" are usually
- * implemented via exclusive access instructions. The exlusive
- * monitor is cleared on ERET, and so taking debug exceptions
- * during a LDREX=>STREX sequence will prevent forward progress
- * and hang the guest/test.
- */
- GUEST_UCALL_NONE();
-
- /*
- * The userspace will verify that the pc is as expected during
- * single step execution between iter_ss_begin and iter_ss_end.
- */
- asm volatile("iter_ss_begin:nop\n");
-
- write_sysreg(w_bvr, dbgbvr0_el1);
- write_sysreg(w_wvr, dbgwvr0_el1);
- bvr = read_sysreg(dbgbvr0_el1);
- wvr = read_sysreg(dbgwvr0_el1);
-
- /* Userspace disables Single Step when the end is nigh. */
- asm volatile("iter_ss_end:\n");
-
- GUEST_ASSERT_EQ(bvr, w_bvr);
- GUEST_ASSERT_EQ(wvr, w_wvr);
- }
- GUEST_DONE();
-}
-
-static int debug_version(uint64_t id_aa64dfr0)
-{
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
-}
-
-static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_BRK64, guest_sw_bp_handler);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_SVC64, guest_svc_handler);
-
- /* Specify bpn/wpn/ctx_bpn to be tested */
- vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
- pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
-done:
- kvm_vm_free(vm);
-}
-
-void test_single_step_from_userspace(int test_cnt)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- struct kvm_run *run;
- uint64_t pc, cmd;
- uint64_t test_pc = 0;
- bool ss_enable = false;
- struct kvm_guest_debug debug = {};
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
- run = vcpu->run;
- vcpu_args_set(vcpu, 1, test_cnt);
-
- while (1) {
- vcpu_run(vcpu);
- if (run->exit_reason != KVM_EXIT_DEBUG) {
- cmd = get_ucall(vcpu, &uc);
- if (cmd == UCALL_ABORT) {
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- } else if (cmd == UCALL_DONE) {
- break;
- }
-
- TEST_ASSERT(cmd == UCALL_NONE,
- "Unexpected ucall cmd 0x%lx", cmd);
-
- debug.control = KVM_GUESTDBG_ENABLE |
- KVM_GUESTDBG_SINGLESTEP;
- ss_enable = true;
- vcpu_guest_debug_set(vcpu, &debug);
- continue;
- }
-
- TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
-
- /* Check if the current pc is expected. */
- pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
- TEST_ASSERT(!test_pc || pc == test_pc,
- "Unexpected pc 0x%lx (expected 0x%lx)",
- pc, test_pc);
-
- if ((pc + 4) == (uint64_t)&iter_ss_end) {
- test_pc = 0;
- debug.control = KVM_GUESTDBG_ENABLE;
- ss_enable = false;
- vcpu_guest_debug_set(vcpu, &debug);
- continue;
- }
-
- /*
- * If the current pc is between iter_ss_bgin and
- * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
- * be the current pc + 4.
- */
- if ((pc >= (uint64_t)&iter_ss_begin) &&
- (pc < (uint64_t)&iter_ss_end))
- test_pc = pc + 4;
- else
- test_pc = 0;
- }
-
- kvm_vm_free(vm);
-}
-
-/*
- * Run debug testing using the various breakpoint#, watchpoint# and
- * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
- */
-void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
-{
- uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
- int b, w, c;
-
- /* Number of breakpoints */
- brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
- __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
-
- /* Number of watchpoints */
- wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
-
- /* Number of context aware breakpoints */
- ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
-
- pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
- brp_num, wrp_num, ctx_brp_num);
-
- /* Number of normal (non-context aware) breakpoints */
- normal_brp_num = brp_num - ctx_brp_num;
-
- /* Lowest context aware breakpoint number */
- ctx_brp_base = normal_brp_num;
-
- /* Run tests with all supported breakpoints/watchpoints */
- for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
- for (b = 0; b < normal_brp_num; b++) {
- for (w = 0; w < wrp_num; w++)
- test_guest_debug_exceptions(b, w, c);
- }
- }
-}
-
-static void help(char *name)
-{
- puts("");
- printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
- puts("");
- exit(0);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- int opt;
- int ss_iteration = 10000;
- uint64_t aa64dfr0;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
- __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
- "Armv8 debug architecture not supported.");
- kvm_vm_free(vm);
-
- while ((opt = getopt(argc, argv, "i:")) != -1) {
- switch (opt) {
- case 'i':
- ss_iteration = atoi_positive("Number of iterations", optarg);
- break;
- case 'h':
- default:
- help(argv[0]);
- break;
- }
- }
-
- test_guest_debug_exceptions_all(aa64dfr0);
- test_single_step_from_userspace(ss_iteration);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Check for KVM_GET_REG_LIST regressions.
- *
- * Copyright (C) 2020, Red Hat, Inc.
- *
- * While the blessed list should be created from the oldest possible
- * kernel, we can't go older than v5.2, though, because that's the first
- * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
- * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
- * registers won't match expectations.
- */
-#include <stdio.h>
-#include "kvm_util.h"
-#include "test_util.h"
-#include "processor.h"
-
-struct feature_id_reg {
- __u64 reg;
- __u64 id_reg;
- __u64 feat_shift;
- __u64 feat_min;
-};
-
-static struct feature_id_reg feat_id_regs[] = {
- {
- ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 0,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
- },
- {
- ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
- }
-};
-
-bool filter_reg(__u64 reg)
-{
- /*
- * DEMUX register presence depends on the host's CLIDR_EL1.
- * This means there's no set of them that we can bless.
- */
- if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return true;
-
- return false;
-}
-
-static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
-{
- int i, ret;
- __u64 data, feat_val;
-
- for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
- if (feat_id_regs[i].reg == reg) {
- ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
- if (ret < 0)
- return false;
-
- feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
- return feat_val >= feat_id_regs[i].feat_min;
- }
- }
-
- return true;
-}
-
-bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
-{
- return check_supported_feat_reg(vcpu, reg);
-}
-
-bool check_reject_set(int err)
-{
- return err == EPERM;
-}
-
-void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
-{
- struct vcpu_reg_sublist *s;
- int feature;
-
- for_each_sublist(c, s) {
- if (s->finalize) {
- feature = s->feature;
- vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
- }
- }
-}
-
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
-
-#define CORE_REGS_XX_NR_WORDS 2
-#define CORE_SPSR_XX_NR_WORDS 2
-#define CORE_FPREGS_XX_NR_WORDS 4
-
-static const char *core_id_to_str(const char *prefix, __u64 id)
-{
- __u64 core_off = id & ~REG_MASK, idx;
-
- /*
- * core_off is the offset into struct kvm_regs
- */
- switch (core_off) {
- case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
- KVM_REG_ARM_CORE_REG(regs.regs[30]):
- idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
- return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
- case KVM_REG_ARM_CORE_REG(regs.sp):
- return "KVM_REG_ARM_CORE_REG(regs.sp)";
- case KVM_REG_ARM_CORE_REG(regs.pc):
- return "KVM_REG_ARM_CORE_REG(regs.pc)";
- case KVM_REG_ARM_CORE_REG(regs.pstate):
- return "KVM_REG_ARM_CORE_REG(regs.pstate)";
- case KVM_REG_ARM_CORE_REG(sp_el1):
- return "KVM_REG_ARM_CORE_REG(sp_el1)";
- case KVM_REG_ARM_CORE_REG(elr_el1):
- return "KVM_REG_ARM_CORE_REG(elr_el1)";
- case KVM_REG_ARM_CORE_REG(spsr[0]) ...
- KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
- idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
- TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
- return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
- case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
- KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
- idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
- return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
- case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
- return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
- case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
- return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
- }
-
- TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
- return NULL;
-}
-
-static const char *sve_id_to_str(const char *prefix, __u64 id)
-{
- __u64 sve_off, n, i;
-
- if (id == KVM_REG_ARM64_SVE_VLS)
- return "KVM_REG_ARM64_SVE_VLS";
-
- sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
- i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
-
- TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
-
- switch (sve_off) {
- case KVM_REG_ARM64_SVE_ZREG_BASE ...
- KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
- n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
- TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
- "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
- return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
- case KVM_REG_ARM64_SVE_PREG_BASE ...
- KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
- n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
- TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
- "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
- return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
- case KVM_REG_ARM64_SVE_FFR_BASE:
- TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
- "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
- return "KVM_REG_ARM64_SVE_FFR(0)";
- }
-
- return NULL;
-}
-
-void print_reg(const char *prefix, __u64 id)
-{
- unsigned op0, op1, crn, crm, op2;
- const char *reg_size = NULL;
-
- TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
- "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
-
- switch (id & KVM_REG_SIZE_MASK) {
- case KVM_REG_SIZE_U8:
- reg_size = "KVM_REG_SIZE_U8";
- break;
- case KVM_REG_SIZE_U16:
- reg_size = "KVM_REG_SIZE_U16";
- break;
- case KVM_REG_SIZE_U32:
- reg_size = "KVM_REG_SIZE_U32";
- break;
- case KVM_REG_SIZE_U64:
- reg_size = "KVM_REG_SIZE_U64";
- break;
- case KVM_REG_SIZE_U128:
- reg_size = "KVM_REG_SIZE_U128";
- break;
- case KVM_REG_SIZE_U256:
- reg_size = "KVM_REG_SIZE_U256";
- break;
- case KVM_REG_SIZE_U512:
- reg_size = "KVM_REG_SIZE_U512";
- break;
- case KVM_REG_SIZE_U1024:
- reg_size = "KVM_REG_SIZE_U1024";
- break;
- case KVM_REG_SIZE_U2048:
- reg_size = "KVM_REG_SIZE_U2048";
- break;
- default:
- TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
- prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
- }
-
- switch (id & KVM_REG_ARM_COPROC_MASK) {
- case KVM_REG_ARM_CORE:
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
- break;
- case KVM_REG_ARM_DEMUX:
- TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
- "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
- reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
- break;
- case KVM_REG_ARM64_SYSREG:
- op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
- op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
- crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
- crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
- op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
- TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
- "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
- printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
- break;
- case KVM_REG_ARM_FW:
- TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
- "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
- printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
- break;
- case KVM_REG_ARM_FW_FEAT_BMAP:
- TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
- "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
- printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
- break;
- case KVM_REG_ARM64_SVE:
- printf("\t%s,\n", sve_id_to_str(prefix, id));
- break;
- default:
- TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
- prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
- }
-}
-
-/*
- * The original blessed list was primed with the output of kernel version
- * v4.15 with --core-reg-fixup and then later updated with new registers.
- * (The --core-reg-fixup option and it's fixup function have been removed
- * from the test, as it's unlikely to use this type of test on a kernel
- * older than v5.2.)
- *
- * The blessed list is up to date with kernel version v6.4 (or so we hope)
- */
-static __u64 base_regs[] = {
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
- KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
- KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */
- KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
- KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
- KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
- KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
- KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
- KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
- ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
- ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
- ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */
- ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */
- ARM64_SYS_REG(2, 0, 0, 0, 4),
- ARM64_SYS_REG(2, 0, 0, 0, 5),
- ARM64_SYS_REG(2, 0, 0, 0, 6),
- ARM64_SYS_REG(2, 0, 0, 0, 7),
- ARM64_SYS_REG(2, 0, 0, 1, 4),
- ARM64_SYS_REG(2, 0, 0, 1, 5),
- ARM64_SYS_REG(2, 0, 0, 1, 6),
- ARM64_SYS_REG(2, 0, 0, 1, 7),
- ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */
- ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */
- ARM64_SYS_REG(2, 0, 0, 2, 4),
- ARM64_SYS_REG(2, 0, 0, 2, 5),
- ARM64_SYS_REG(2, 0, 0, 2, 6),
- ARM64_SYS_REG(2, 0, 0, 2, 7),
- ARM64_SYS_REG(2, 0, 0, 3, 4),
- ARM64_SYS_REG(2, 0, 0, 3, 5),
- ARM64_SYS_REG(2, 0, 0, 3, 6),
- ARM64_SYS_REG(2, 0, 0, 3, 7),
- ARM64_SYS_REG(2, 0, 0, 4, 4),
- ARM64_SYS_REG(2, 0, 0, 4, 5),
- ARM64_SYS_REG(2, 0, 0, 4, 6),
- ARM64_SYS_REG(2, 0, 0, 4, 7),
- ARM64_SYS_REG(2, 0, 0, 5, 4),
- ARM64_SYS_REG(2, 0, 0, 5, 5),
- ARM64_SYS_REG(2, 0, 0, 5, 6),
- ARM64_SYS_REG(2, 0, 0, 5, 7),
- ARM64_SYS_REG(2, 0, 0, 6, 4),
- ARM64_SYS_REG(2, 0, 0, 6, 5),
- ARM64_SYS_REG(2, 0, 0, 6, 6),
- ARM64_SYS_REG(2, 0, 0, 6, 7),
- ARM64_SYS_REG(2, 0, 0, 7, 4),
- ARM64_SYS_REG(2, 0, 0, 7, 5),
- ARM64_SYS_REG(2, 0, 0, 7, 6),
- ARM64_SYS_REG(2, 0, 0, 7, 7),
- ARM64_SYS_REG(2, 0, 0, 8, 4),
- ARM64_SYS_REG(2, 0, 0, 8, 5),
- ARM64_SYS_REG(2, 0, 0, 8, 6),
- ARM64_SYS_REG(2, 0, 0, 8, 7),
- ARM64_SYS_REG(2, 0, 0, 9, 4),
- ARM64_SYS_REG(2, 0, 0, 9, 5),
- ARM64_SYS_REG(2, 0, 0, 9, 6),
- ARM64_SYS_REG(2, 0, 0, 9, 7),
- ARM64_SYS_REG(2, 0, 0, 10, 4),
- ARM64_SYS_REG(2, 0, 0, 10, 5),
- ARM64_SYS_REG(2, 0, 0, 10, 6),
- ARM64_SYS_REG(2, 0, 0, 10, 7),
- ARM64_SYS_REG(2, 0, 0, 11, 4),
- ARM64_SYS_REG(2, 0, 0, 11, 5),
- ARM64_SYS_REG(2, 0, 0, 11, 6),
- ARM64_SYS_REG(2, 0, 0, 11, 7),
- ARM64_SYS_REG(2, 0, 0, 12, 4),
- ARM64_SYS_REG(2, 0, 0, 12, 5),
- ARM64_SYS_REG(2, 0, 0, 12, 6),
- ARM64_SYS_REG(2, 0, 0, 12, 7),
- ARM64_SYS_REG(2, 0, 0, 13, 4),
- ARM64_SYS_REG(2, 0, 0, 13, 5),
- ARM64_SYS_REG(2, 0, 0, 13, 6),
- ARM64_SYS_REG(2, 0, 0, 13, 7),
- ARM64_SYS_REG(2, 0, 0, 14, 4),
- ARM64_SYS_REG(2, 0, 0, 14, 5),
- ARM64_SYS_REG(2, 0, 0, 14, 6),
- ARM64_SYS_REG(2, 0, 0, 14, 7),
- ARM64_SYS_REG(2, 0, 0, 15, 4),
- ARM64_SYS_REG(2, 0, 0, 15, 5),
- ARM64_SYS_REG(2, 0, 0, 15, 6),
- ARM64_SYS_REG(2, 0, 0, 15, 7),
- ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */
- ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
- ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */
- ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 3),
- ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */
- ARM64_SYS_REG(3, 0, 0, 3, 7),
- ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 3),
- ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 6),
- ARM64_SYS_REG(3, 0, 0, 4, 7),
- ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 5, 2),
- ARM64_SYS_REG(3, 0, 0, 5, 3),
- ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 5, 6),
- ARM64_SYS_REG(3, 0, 0, 5, 7),
- ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 6, 3),
- ARM64_SYS_REG(3, 0, 0, 6, 4),
- ARM64_SYS_REG(3, 0, 0, 6, 5),
- ARM64_SYS_REG(3, 0, 0, 6, 6),
- ARM64_SYS_REG(3, 0, 0, 6, 7),
- ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 5),
- ARM64_SYS_REG(3, 0, 0, 7, 6),
- ARM64_SYS_REG(3, 0, 0, 7, 7),
- ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
- ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
- ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
- ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
- ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
- ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
- ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
- ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
- ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
- ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
- ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
- ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
- ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
- ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
- ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
- ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
- ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
- ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
- ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
- ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */
- ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
- ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
- ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
- ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
- ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
- ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
-};
-
-static __u64 pmu_regs[] = {
- ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
- ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
- ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
- ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
- ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */
- ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */
- ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */
- ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
- ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
- ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
- ARM64_SYS_REG(3, 3, 14, 8, 0),
- ARM64_SYS_REG(3, 3, 14, 8, 1),
- ARM64_SYS_REG(3, 3, 14, 8, 2),
- ARM64_SYS_REG(3, 3, 14, 8, 3),
- ARM64_SYS_REG(3, 3, 14, 8, 4),
- ARM64_SYS_REG(3, 3, 14, 8, 5),
- ARM64_SYS_REG(3, 3, 14, 8, 6),
- ARM64_SYS_REG(3, 3, 14, 8, 7),
- ARM64_SYS_REG(3, 3, 14, 9, 0),
- ARM64_SYS_REG(3, 3, 14, 9, 1),
- ARM64_SYS_REG(3, 3, 14, 9, 2),
- ARM64_SYS_REG(3, 3, 14, 9, 3),
- ARM64_SYS_REG(3, 3, 14, 9, 4),
- ARM64_SYS_REG(3, 3, 14, 9, 5),
- ARM64_SYS_REG(3, 3, 14, 9, 6),
- ARM64_SYS_REG(3, 3, 14, 9, 7),
- ARM64_SYS_REG(3, 3, 14, 10, 0),
- ARM64_SYS_REG(3, 3, 14, 10, 1),
- ARM64_SYS_REG(3, 3, 14, 10, 2),
- ARM64_SYS_REG(3, 3, 14, 10, 3),
- ARM64_SYS_REG(3, 3, 14, 10, 4),
- ARM64_SYS_REG(3, 3, 14, 10, 5),
- ARM64_SYS_REG(3, 3, 14, 10, 6),
- ARM64_SYS_REG(3, 3, 14, 10, 7),
- ARM64_SYS_REG(3, 3, 14, 11, 0),
- ARM64_SYS_REG(3, 3, 14, 11, 1),
- ARM64_SYS_REG(3, 3, 14, 11, 2),
- ARM64_SYS_REG(3, 3, 14, 11, 3),
- ARM64_SYS_REG(3, 3, 14, 11, 4),
- ARM64_SYS_REG(3, 3, 14, 11, 5),
- ARM64_SYS_REG(3, 3, 14, 11, 6),
- ARM64_SYS_REG(3, 3, 14, 12, 0),
- ARM64_SYS_REG(3, 3, 14, 12, 1),
- ARM64_SYS_REG(3, 3, 14, 12, 2),
- ARM64_SYS_REG(3, 3, 14, 12, 3),
- ARM64_SYS_REG(3, 3, 14, 12, 4),
- ARM64_SYS_REG(3, 3, 14, 12, 5),
- ARM64_SYS_REG(3, 3, 14, 12, 6),
- ARM64_SYS_REG(3, 3, 14, 12, 7),
- ARM64_SYS_REG(3, 3, 14, 13, 0),
- ARM64_SYS_REG(3, 3, 14, 13, 1),
- ARM64_SYS_REG(3, 3, 14, 13, 2),
- ARM64_SYS_REG(3, 3, 14, 13, 3),
- ARM64_SYS_REG(3, 3, 14, 13, 4),
- ARM64_SYS_REG(3, 3, 14, 13, 5),
- ARM64_SYS_REG(3, 3, 14, 13, 6),
- ARM64_SYS_REG(3, 3, 14, 13, 7),
- ARM64_SYS_REG(3, 3, 14, 14, 0),
- ARM64_SYS_REG(3, 3, 14, 14, 1),
- ARM64_SYS_REG(3, 3, 14, 14, 2),
- ARM64_SYS_REG(3, 3, 14, 14, 3),
- ARM64_SYS_REG(3, 3, 14, 14, 4),
- ARM64_SYS_REG(3, 3, 14, 14, 5),
- ARM64_SYS_REG(3, 3, 14, 14, 6),
- ARM64_SYS_REG(3, 3, 14, 14, 7),
- ARM64_SYS_REG(3, 3, 14, 15, 0),
- ARM64_SYS_REG(3, 3, 14, 15, 1),
- ARM64_SYS_REG(3, 3, 14, 15, 2),
- ARM64_SYS_REG(3, 3, 14, 15, 3),
- ARM64_SYS_REG(3, 3, 14, 15, 4),
- ARM64_SYS_REG(3, 3, 14, 15, 5),
- ARM64_SYS_REG(3, 3, 14, 15, 6),
- ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
-};
-
-static __u64 vregs[] = {
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
- KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
-};
-
-static __u64 sve_regs[] = {
- KVM_REG_ARM64_SVE_VLS,
- KVM_REG_ARM64_SVE_ZREG(0, 0),
- KVM_REG_ARM64_SVE_ZREG(1, 0),
- KVM_REG_ARM64_SVE_ZREG(2, 0),
- KVM_REG_ARM64_SVE_ZREG(3, 0),
- KVM_REG_ARM64_SVE_ZREG(4, 0),
- KVM_REG_ARM64_SVE_ZREG(5, 0),
- KVM_REG_ARM64_SVE_ZREG(6, 0),
- KVM_REG_ARM64_SVE_ZREG(7, 0),
- KVM_REG_ARM64_SVE_ZREG(8, 0),
- KVM_REG_ARM64_SVE_ZREG(9, 0),
- KVM_REG_ARM64_SVE_ZREG(10, 0),
- KVM_REG_ARM64_SVE_ZREG(11, 0),
- KVM_REG_ARM64_SVE_ZREG(12, 0),
- KVM_REG_ARM64_SVE_ZREG(13, 0),
- KVM_REG_ARM64_SVE_ZREG(14, 0),
- KVM_REG_ARM64_SVE_ZREG(15, 0),
- KVM_REG_ARM64_SVE_ZREG(16, 0),
- KVM_REG_ARM64_SVE_ZREG(17, 0),
- KVM_REG_ARM64_SVE_ZREG(18, 0),
- KVM_REG_ARM64_SVE_ZREG(19, 0),
- KVM_REG_ARM64_SVE_ZREG(20, 0),
- KVM_REG_ARM64_SVE_ZREG(21, 0),
- KVM_REG_ARM64_SVE_ZREG(22, 0),
- KVM_REG_ARM64_SVE_ZREG(23, 0),
- KVM_REG_ARM64_SVE_ZREG(24, 0),
- KVM_REG_ARM64_SVE_ZREG(25, 0),
- KVM_REG_ARM64_SVE_ZREG(26, 0),
- KVM_REG_ARM64_SVE_ZREG(27, 0),
- KVM_REG_ARM64_SVE_ZREG(28, 0),
- KVM_REG_ARM64_SVE_ZREG(29, 0),
- KVM_REG_ARM64_SVE_ZREG(30, 0),
- KVM_REG_ARM64_SVE_ZREG(31, 0),
- KVM_REG_ARM64_SVE_PREG(0, 0),
- KVM_REG_ARM64_SVE_PREG(1, 0),
- KVM_REG_ARM64_SVE_PREG(2, 0),
- KVM_REG_ARM64_SVE_PREG(3, 0),
- KVM_REG_ARM64_SVE_PREG(4, 0),
- KVM_REG_ARM64_SVE_PREG(5, 0),
- KVM_REG_ARM64_SVE_PREG(6, 0),
- KVM_REG_ARM64_SVE_PREG(7, 0),
- KVM_REG_ARM64_SVE_PREG(8, 0),
- KVM_REG_ARM64_SVE_PREG(9, 0),
- KVM_REG_ARM64_SVE_PREG(10, 0),
- KVM_REG_ARM64_SVE_PREG(11, 0),
- KVM_REG_ARM64_SVE_PREG(12, 0),
- KVM_REG_ARM64_SVE_PREG(13, 0),
- KVM_REG_ARM64_SVE_PREG(14, 0),
- KVM_REG_ARM64_SVE_PREG(15, 0),
- KVM_REG_ARM64_SVE_FFR(0),
- ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
-};
-
-static __u64 sve_rejects_set[] = {
- KVM_REG_ARM64_SVE_VLS,
-};
-
-static __u64 pauth_addr_regs[] = {
- ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
- ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
- ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
- ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
- ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
- ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
- ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
- ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
-};
-
-static __u64 pauth_generic_regs[] = {
- ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
- ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
-};
-
-#define BASE_SUBLIST \
- { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
-#define VREGS_SUBLIST \
- { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
-#define PMU_SUBLIST \
- { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
- .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
-#define SVE_SUBLIST \
- { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
- .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
- .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
-#define PAUTH_SUBLIST \
- { \
- .name = "pauth_address", \
- .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
- .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
- .regs = pauth_addr_regs, \
- .regs_n = ARRAY_SIZE(pauth_addr_regs), \
- }, \
- { \
- .name = "pauth_generic", \
- .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
- .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
- .regs = pauth_generic_regs, \
- .regs_n = ARRAY_SIZE(pauth_generic_regs), \
- }
-
-static struct vcpu_reg_list vregs_config = {
- .sublists = {
- BASE_SUBLIST,
- VREGS_SUBLIST,
- {0},
- },
-};
-static struct vcpu_reg_list vregs_pmu_config = {
- .sublists = {
- BASE_SUBLIST,
- VREGS_SUBLIST,
- PMU_SUBLIST,
- {0},
- },
-};
-static struct vcpu_reg_list sve_config = {
- .sublists = {
- BASE_SUBLIST,
- SVE_SUBLIST,
- {0},
- },
-};
-static struct vcpu_reg_list sve_pmu_config = {
- .sublists = {
- BASE_SUBLIST,
- SVE_SUBLIST,
- PMU_SUBLIST,
- {0},
- },
-};
-static struct vcpu_reg_list pauth_config = {
- .sublists = {
- BASE_SUBLIST,
- VREGS_SUBLIST,
- PAUTH_SUBLIST,
- {0},
- },
-};
-static struct vcpu_reg_list pauth_pmu_config = {
- .sublists = {
- BASE_SUBLIST,
- VREGS_SUBLIST,
- PAUTH_SUBLIST,
- PMU_SUBLIST,
- {0},
- },
-};
-
-struct vcpu_reg_list *vcpu_configs[] = {
- &vregs_config,
- &vregs_pmu_config,
- &sve_config,
- &sve_pmu_config,
- &pauth_config,
- &pauth_pmu_config,
-};
-int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-
-/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
- *
- * The test validates the basic hypercall functionalities that are exposed
- * via the psuedo-firmware bitmap register. This includes the registers'
- * read/write behavior before and after the VM has started, and if the
- * hypercalls are properly masked or unmasked to the guest when disabled or
- * enabled from the KVM userspace, respectively.
- */
-#include <errno.h>
-#include <linux/arm-smccc.h>
-#include <asm/kvm.h>
-#include <kvm_util.h>
-
-#include "processor.h"
-
-#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
-
-/* Last valid bits of the bitmapped firmware registers */
-#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
-#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
-#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
-
-struct kvm_fw_reg_info {
- uint64_t reg; /* Register definition */
- uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
-};
-
-#define FW_REG_INFO(r) \
- { \
- .reg = r, \
- .max_feat_bit = r##_BIT_MAX, \
- }
-
-static const struct kvm_fw_reg_info fw_reg_info[] = {
- FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
- FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
- FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
-};
-
-enum test_stage {
- TEST_STAGE_REG_IFACE,
- TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
- TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
- TEST_STAGE_HVC_IFACE_FALSE_INFO,
- TEST_STAGE_END,
-};
-
-static int stage = TEST_STAGE_REG_IFACE;
-
-struct test_hvc_info {
- uint32_t func_id;
- uint64_t arg1;
-};
-
-#define TEST_HVC_INFO(f, a1) \
- { \
- .func_id = f, \
- .arg1 = a1, \
- }
-
-static const struct test_hvc_info hvc_info[] = {
- /* KVM_REG_ARM_STD_BMAP */
- TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
- TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
- TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
- TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
- TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
-
- /* KVM_REG_ARM_STD_HYP_BMAP */
- TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
- TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
- TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
-
- /* KVM_REG_ARM_VENDOR_HYP_BMAP */
- TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
- ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
- TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
- TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
-};
-
-/* Feed false hypercall info to test the KVM behavior */
-static const struct test_hvc_info false_hvc_info[] = {
- /* Feature support check against a different family of hypercalls */
- TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
- TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
- TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
-};
-
-static void guest_test_hvc(const struct test_hvc_info *hc_info)
-{
- unsigned int i;
- struct arm_smccc_res res;
- unsigned int hvc_info_arr_sz;
-
- hvc_info_arr_sz =
- hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
-
- for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
- memset(&res, 0, sizeof(res));
- smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
-
- switch (stage) {
- case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
- case TEST_STAGE_HVC_IFACE_FALSE_INFO:
- __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
- "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
- res.a0, hc_info->func_id, hc_info->arg1, stage);
- break;
- case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
- __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
- "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
- res.a0, hc_info->func_id, hc_info->arg1, stage);
- break;
- default:
- GUEST_FAIL("Unexpected stage = %u", stage);
- }
- }
-}
-
-static void guest_code(void)
-{
- while (stage != TEST_STAGE_END) {
- switch (stage) {
- case TEST_STAGE_REG_IFACE:
- break;
- case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
- case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
- guest_test_hvc(hvc_info);
- break;
- case TEST_STAGE_HVC_IFACE_FALSE_INFO:
- guest_test_hvc(false_hvc_info);
- break;
- default:
- GUEST_FAIL("Unexpected stage = %u", stage);
- }
-
- GUEST_SYNC(stage);
- }
-
- GUEST_DONE();
-}
-
-struct st_time {
- uint32_t rev;
- uint32_t attr;
- uint64_t st_time;
-};
-
-#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
-#define ST_GPA_BASE (1 << 30)
-
-static void steal_time_init(struct kvm_vcpu *vcpu)
-{
- uint64_t st_ipa = (ulong)ST_GPA_BASE;
- unsigned int gpages;
-
- gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
- vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
-
- vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
- KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
-}
-
-static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
-{
- uint64_t val;
- unsigned int i;
- int ret;
-
- for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
- const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
-
- /* First 'read' should be an upper limit of the features supported */
- val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
- "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
- reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
-
- /* Test a 'write' by disabling all the features of the register map */
- ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
- TEST_ASSERT(ret == 0,
- "Failed to clear all the features of reg: 0x%lx; ret: %d",
- reg_info->reg, errno);
-
- val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
-
- /*
- * Test enabling a feature that's not supported.
- * Avoid this check if all the bits are occupied.
- */
- if (reg_info->max_feat_bit < 63) {
- ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
- TEST_ASSERT(ret != 0 && errno == EINVAL,
- "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
- errno, reg_info->reg);
- }
- }
-}
-
-static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
-{
- uint64_t val;
- unsigned int i;
- int ret;
-
- for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
- const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
-
- /*
- * Before starting the VM, the test clears all the bits.
- * Check if that's still the case.
- */
- val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx",
- reg_info->reg);
-
- /*
- * Since the VM has run at least once, KVM shouldn't allow modification of
- * the registers and should return EBUSY. Set the registers and check for
- * the expected errno.
- */
- ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
- TEST_ASSERT(ret != 0 && errno == EBUSY,
- "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
- errno, reg_info->reg);
- }
-}
-
-static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
-{
- struct kvm_vm *vm;
-
- vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- steal_time_init(*vcpu);
-
- return vm;
-}
-
-static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
-{
- int prev_stage = stage;
-
- pr_debug("Stage: %d\n", prev_stage);
-
- /* Sync the stage early, the VM might be freed below. */
- stage++;
- sync_global_to_guest(*vm, stage);
-
- switch (prev_stage) {
- case TEST_STAGE_REG_IFACE:
- test_fw_regs_after_vm_start(*vcpu);
- break;
- case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
- /* Start a new VM so that all the features are now enabled by default */
- kvm_vm_free(*vm);
- *vm = test_vm_create(vcpu);
- break;
- case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
- case TEST_STAGE_HVC_IFACE_FALSE_INFO:
- break;
- default:
- TEST_FAIL("Unknown test stage: %d", prev_stage);
- }
-}
-
-static void test_run(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- bool guest_done = false;
-
- vm = test_vm_create(&vcpu);
-
- test_fw_regs_before_vm_start(vcpu);
-
- while (!guest_done) {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- test_guest_stage(&vm, &vcpu);
- break;
- case UCALL_DONE:
- guest_done = true;
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- default:
- TEST_FAIL("Unexpected guest exit");
- }
- }
-
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_run();
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR 0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
- u64 esr = read_sysreg(esr_el1);
-
- GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
- GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
- GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
- GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
- handler_fn dabt_handler)
-{
- struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
- virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
- return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events = {};
-
- events.exception.ext_dabt_pending = true;
- vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
- asm volatile("test_mmio_abort_insn:\n\t"
- "ldr x0, [%0]\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
- TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
- TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
- TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
- asm volatile("test_mmio_nisv_insn:\n\t"
- "ldr x0, [%0], #8\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- unexpected_dabt_handler);
-
- TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
- TEST_ASSERT_EQ(errno, ENOSYS);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
- TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_mmio_abort();
- test_mmio_nisv();
- test_mmio_nisv_abort();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-// Check that, on a GICv3 system, not configuring GICv3 correctly
-// results in all of the sysregs generating an UNDEF exception.
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-static volatile bool handled;
-
-#define __check_sr_read(r) \
- ({ \
- uint64_t val; \
- \
- handled = false; \
- dsb(sy); \
- val = read_sysreg_s(SYS_ ## r); \
- val; \
- })
-
-#define __check_sr_write(r) \
- do { \
- handled = false; \
- dsb(sy); \
- write_sysreg_s(0, SYS_ ## r); \
- isb(); \
- } while(0)
-
-/* Fatal checks */
-#define check_sr_read(r) \
- do { \
- __check_sr_read(r); \
- __GUEST_ASSERT(handled, #r " no read trap"); \
- } while(0)
-
-#define check_sr_write(r) \
- do { \
- __check_sr_write(r); \
- __GUEST_ASSERT(handled, #r " no write trap"); \
- } while(0)
-
-#define check_sr_rw(r) \
- do { \
- check_sr_read(r); \
- check_sr_write(r); \
- } while(0)
-
-static void guest_code(void)
-{
- uint64_t val;
-
- /*
- * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
- * hidden the feature at runtime without any other userspace action.
- */
- __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
- read_sysreg(id_aa64pfr0_el1)) == 0,
- "GICv3 wrongly advertised");
-
- /*
- * Access all GICv3 registers, and fail if we don't get an UNDEF.
- * Note that we happily access all the APxRn registers without
- * checking their existance, as all we want to see is a failure.
- */
- check_sr_rw(ICC_PMR_EL1);
- check_sr_read(ICC_IAR0_EL1);
- check_sr_write(ICC_EOIR0_EL1);
- check_sr_rw(ICC_HPPIR0_EL1);
- check_sr_rw(ICC_BPR0_EL1);
- check_sr_rw(ICC_AP0R0_EL1);
- check_sr_rw(ICC_AP0R1_EL1);
- check_sr_rw(ICC_AP0R2_EL1);
- check_sr_rw(ICC_AP0R3_EL1);
- check_sr_rw(ICC_AP1R0_EL1);
- check_sr_rw(ICC_AP1R1_EL1);
- check_sr_rw(ICC_AP1R2_EL1);
- check_sr_rw(ICC_AP1R3_EL1);
- check_sr_write(ICC_DIR_EL1);
- check_sr_read(ICC_RPR_EL1);
- check_sr_write(ICC_SGI1R_EL1);
- check_sr_write(ICC_ASGI1R_EL1);
- check_sr_write(ICC_SGI0R_EL1);
- check_sr_read(ICC_IAR1_EL1);
- check_sr_write(ICC_EOIR1_EL1);
- check_sr_rw(ICC_HPPIR1_EL1);
- check_sr_rw(ICC_BPR1_EL1);
- check_sr_rw(ICC_CTLR_EL1);
- check_sr_rw(ICC_IGRPEN0_EL1);
- check_sr_rw(ICC_IGRPEN1_EL1);
-
- /*
- * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
- * be RAO/WI. Engage in non-fatal accesses, starting with a
- * write of 0 to try and disable SRE, and let's see if it
- * sticks.
- */
- __check_sr_write(ICC_SRE_EL1);
- if (!handled)
- GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
-
- val = __check_sr_read(ICC_SRE_EL1);
- if (!handled) {
- __GUEST_ASSERT((val & BIT(0)),
- "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
- GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
- }
-
- GUEST_DONE();
-}
-
-static void guest_undef_handler(struct ex_regs *regs)
-{
- /* Success, we've gracefully exploded! */
- handled = true;
- regs->pc += 4;
-}
-
-static void test_run_vcpu(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- do {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_PRINTF:
- printf("%s", uc.buffer);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- } while (uc.cmd != UCALL_DONE);
-}
-
-static void test_guest_no_gicv3(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /* Create a VM without a GICv3 */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_UNKNOWN, guest_undef_handler);
-
- test_run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t pfr0;
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
- pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
- "GICv3 not supported.");
- kvm_vm_free(vm);
-
- test_guest_no_gicv3();
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * page_fault_test.c - Test stage 2 faults.
- *
- * This test tries different combinations of guest accesses (e.g., write,
- * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
- * hugetlbfs with a hole). It checks that the expected handling method is
- * called (e.g., uffd faults with the right address and write/read flag).
- */
-#include <linux/bitmap.h>
-#include <fcntl.h>
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-#include <asm/sysreg.h>
-#include <linux/bitfield.h>
-#include "guest_modes.h"
-#include "userfaultfd_util.h"
-
-/* Guest virtual addresses that point to the test page and its PTE. */
-#define TEST_GVA 0xc0000000
-#define TEST_EXEC_GVA (TEST_GVA + 0x8)
-#define TEST_PTE_GVA 0xb0000000
-#define TEST_DATA 0x0123456789ABCDEF
-
-static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
-
-#define CMD_NONE (0)
-#define CMD_SKIP_TEST (1ULL << 1)
-#define CMD_HOLE_PT (1ULL << 2)
-#define CMD_HOLE_DATA (1ULL << 3)
-#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)
-#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)
-#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)
-#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)
-#define CMD_SET_PTE_AF (1ULL << 8)
-
-#define PREPARE_FN_NR 10
-#define CHECK_FN_NR 10
-
-static struct event_cnt {
- int mmio_exits;
- int fail_vcpu_runs;
- int uffd_faults;
- /* uffd_faults is incremented from multiple threads. */
- pthread_mutex_t uffd_faults_mutex;
-} events;
-
-struct test_desc {
- const char *name;
- uint64_t mem_mark_cmd;
- /* Skip the test if any prepare function returns false */
- bool (*guest_prepare[PREPARE_FN_NR])(void);
- void (*guest_test)(void);
- void (*guest_test_check[CHECK_FN_NR])(void);
- uffd_handler_t uffd_pt_handler;
- uffd_handler_t uffd_data_handler;
- void (*dabt_handler)(struct ex_regs *regs);
- void (*iabt_handler)(struct ex_regs *regs);
- void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
- void (*fail_vcpu_run_handler)(int ret);
- uint32_t pt_memslot_flags;
- uint32_t data_memslot_flags;
- bool skip;
- struct event_cnt expected_events;
-};
-
-struct test_params {
- enum vm_mem_backing_src_type src_type;
- struct test_desc *test_desc;
-};
-
-static inline void flush_tlb_page(uint64_t vaddr)
-{
- uint64_t page = vaddr >> 12;
-
- dsb(ishst);
- asm volatile("tlbi vaae1is, %0" :: "r" (page));
- dsb(ish);
- isb();
-}
-
-static void guest_write64(void)
-{
- uint64_t val;
-
- WRITE_ONCE(*guest_test_memory, TEST_DATA);
- val = READ_ONCE(*guest_test_memory);
- GUEST_ASSERT_EQ(val, TEST_DATA);
-}
-
-/* Check the system for atomic instructions. */
-static bool guest_check_lse(void)
-{
- uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
- uint64_t atomic;
-
- atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
- return atomic >= 2;
-}
-
-static bool guest_check_dc_zva(void)
-{
- uint64_t dczid = read_sysreg(dczid_el0);
- uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
-
- return dzp == 0;
-}
-
-/* Compare and swap instruction. */
-static void guest_cas(void)
-{
- uint64_t val;
-
- GUEST_ASSERT(guest_check_lse());
- asm volatile(".arch_extension lse\n"
- "casal %0, %1, [%2]\n"
- :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
- val = READ_ONCE(*guest_test_memory);
- GUEST_ASSERT_EQ(val, TEST_DATA);
-}
-
-static void guest_read64(void)
-{
- uint64_t val;
-
- val = READ_ONCE(*guest_test_memory);
- GUEST_ASSERT_EQ(val, 0);
-}
-
-/* Address translation instruction */
-static void guest_at(void)
-{
- uint64_t par;
-
- asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
- isb();
- par = read_sysreg(par_el1);
-
- /* Bit 1 indicates whether the AT was successful */
- GUEST_ASSERT_EQ(par & 1, 0);
-}
-
-/*
- * The size of the block written by "dc zva" is guaranteed to be between (2 <<
- * 0) and (2 << 9), which is safe in our case as we need the write to happen
- * for at least a word, and not more than a page.
- */
-static void guest_dc_zva(void)
-{
- uint16_t val;
-
- asm volatile("dc zva, %0" :: "r" (guest_test_memory));
- dsb(ish);
- val = READ_ONCE(*guest_test_memory);
- GUEST_ASSERT_EQ(val, 0);
-}
-
-/*
- * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
- * And that's special because KVM must take special care with those: they
- * should still count as accesses for dirty logging or user-faulting, but
- * should be handled differently on mmio.
- */
-static void guest_ld_preidx(void)
-{
- uint64_t val;
- uint64_t addr = TEST_GVA - 8;
-
- /*
- * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
- * in a gap between memslots not backing by anything.
- */
- asm volatile("ldr %0, [%1, #8]!"
- : "=r" (val), "+r" (addr));
- GUEST_ASSERT_EQ(val, 0);
- GUEST_ASSERT_EQ(addr, TEST_GVA);
-}
-
-static void guest_st_preidx(void)
-{
- uint64_t val = TEST_DATA;
- uint64_t addr = TEST_GVA - 8;
-
- asm volatile("str %0, [%1, #8]!"
- : "+r" (val), "+r" (addr));
-
- GUEST_ASSERT_EQ(addr, TEST_GVA);
- val = READ_ONCE(*guest_test_memory);
-}
-
-static bool guest_set_ha(void)
-{
- uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
- uint64_t hadbs, tcr;
-
- /* Skip if HA is not supported. */
- hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
- if (hadbs == 0)
- return false;
-
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
- write_sysreg(tcr, tcr_el1);
- isb();
-
- return true;
-}
-
-static bool guest_clear_pte_af(void)
-{
- *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
- flush_tlb_page(TEST_GVA);
-
- return true;
-}
-
-static void guest_check_pte_af(void)
-{
- dsb(ish);
- GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
-}
-
-static void guest_check_write_in_dirty_log(void)
-{
- GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
-}
-
-static void guest_check_no_write_in_dirty_log(void)
-{
- GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
-}
-
-static void guest_check_s1ptw_wr_in_dirty_log(void)
-{
- GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
-}
-
-static void guest_check_no_s1ptw_wr_in_dirty_log(void)
-{
- GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
-}
-
-static void guest_exec(void)
-{
- int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
- int ret;
-
- ret = code();
- GUEST_ASSERT_EQ(ret, 0x77);
-}
-
-static bool guest_prepare(struct test_desc *test)
-{
- bool (*prepare_fn)(void);
- int i;
-
- for (i = 0; i < PREPARE_FN_NR; i++) {
- prepare_fn = test->guest_prepare[i];
- if (prepare_fn && !prepare_fn())
- return false;
- }
-
- return true;
-}
-
-static void guest_test_check(struct test_desc *test)
-{
- void (*check_fn)(void);
- int i;
-
- for (i = 0; i < CHECK_FN_NR; i++) {
- check_fn = test->guest_test_check[i];
- if (check_fn)
- check_fn();
- }
-}
-
-static void guest_code(struct test_desc *test)
-{
- if (!guest_prepare(test))
- GUEST_SYNC(CMD_SKIP_TEST);
-
- GUEST_SYNC(test->mem_mark_cmd);
-
- if (test->guest_test)
- test->guest_test();
-
- guest_test_check(test);
- GUEST_DONE();
-}
-
-static void no_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
-}
-
-static void no_iabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
-}
-
-static struct uffd_args {
- char *copy;
- void *hva;
- uint64_t paging_size;
-} pt_args, data_args;
-
-/* Returns true to continue the test, and false if it should be skipped. */
-static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
- struct uffd_args *args)
-{
- uint64_t addr = msg->arg.pagefault.address;
- uint64_t flags = msg->arg.pagefault.flags;
- struct uffdio_copy copy;
- int ret;
-
- TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
- "The only expected UFFD mode is MISSING");
- TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
-
- pr_debug("uffd fault: addr=%p write=%d\n",
- (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
-
- copy.src = (uint64_t)args->copy;
- copy.dst = addr;
- copy.len = args->paging_size;
- copy.mode = 0;
-
- ret = ioctl(uffd, UFFDIO_COPY, ©);
- if (ret == -1) {
- pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
- addr, errno);
- return ret;
- }
-
- pthread_mutex_lock(&events.uffd_faults_mutex);
- events.uffd_faults += 1;
- pthread_mutex_unlock(&events.uffd_faults_mutex);
- return 0;
-}
-
-static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
-{
- return uffd_generic_handler(mode, uffd, msg, &pt_args);
-}
-
-static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
-{
- return uffd_generic_handler(mode, uffd, msg, &data_args);
-}
-
-static void setup_uffd_args(struct userspace_mem_region *region,
- struct uffd_args *args)
-{
- args->hva = (void *)region->region.userspace_addr;
- args->paging_size = region->region.memory_size;
-
- args->copy = malloc(args->paging_size);
- TEST_ASSERT(args->copy, "Failed to allocate data copy.");
- memcpy(args->copy, args->hva, args->paging_size);
-}
-
-static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
- struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
-{
- struct test_desc *test = p->test_desc;
- int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
-
- setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
- setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
-
- *pt_uffd = NULL;
- if (test->uffd_pt_handler)
- *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
- pt_args.hva,
- pt_args.paging_size,
- 1, test->uffd_pt_handler);
-
- *data_uffd = NULL;
- if (test->uffd_data_handler)
- *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
- data_args.hva,
- data_args.paging_size,
- 1, test->uffd_data_handler);
-}
-
-static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
- struct uffd_desc *data_uffd)
-{
- if (test->uffd_pt_handler)
- uffd_stop_demand_paging(pt_uffd);
- if (test->uffd_data_handler)
- uffd_stop_demand_paging(data_uffd);
-
- free(pt_args.copy);
- free(data_args.copy);
-}
-
-static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
-{
- TEST_FAIL("There was no UFFD fault expected.");
- return -1;
-}
-
-/* Returns false if the test should be skipped. */
-static bool punch_hole_in_backing_store(struct kvm_vm *vm,
- struct userspace_mem_region *region)
-{
- void *hva = (void *)region->region.userspace_addr;
- uint64_t paging_size = region->region.memory_size;
- int ret, fd = region->fd;
-
- if (fd != -1) {
- ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- 0, paging_size);
- TEST_ASSERT(ret == 0, "fallocate failed");
- } else {
- ret = madvise(hva, paging_size, MADV_DONTNEED);
- TEST_ASSERT(ret == 0, "madvise failed");
- }
-
- return true;
-}
-
-static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
-{
- struct userspace_mem_region *region;
- void *hva;
-
- region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
- hva = (void *)region->region.userspace_addr;
-
- TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
-
- memcpy(hva, run->mmio.data, run->mmio.len);
- events.mmio_exits += 1;
-}
-
-static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
-{
- uint64_t data;
-
- memcpy(&data, run->mmio.data, sizeof(data));
- pr_debug("addr=%lld len=%d w=%d data=%lx\n",
- run->mmio.phys_addr, run->mmio.len,
- run->mmio.is_write, data);
- TEST_FAIL("There was no MMIO exit expected.");
-}
-
-static bool check_write_in_dirty_log(struct kvm_vm *vm,
- struct userspace_mem_region *region,
- uint64_t host_pg_nr)
-{
- unsigned long *bmap;
- bool first_page_dirty;
- uint64_t size = region->region.memory_size;
-
- /* getpage_size() is not always equal to vm->page_size */
- bmap = bitmap_zalloc(size / getpagesize());
- kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
- first_page_dirty = test_bit(host_pg_nr, bmap);
- free(bmap);
- return first_page_dirty;
-}
-
-/* Returns true to continue the test, and false if it should be skipped. */
-static bool handle_cmd(struct kvm_vm *vm, int cmd)
-{
- struct userspace_mem_region *data_region, *pt_region;
- bool continue_test = true;
- uint64_t pte_gpa, pte_pg;
-
- data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
- pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
- pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
- pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
-
- if (cmd == CMD_SKIP_TEST)
- continue_test = false;
-
- if (cmd & CMD_HOLE_PT)
- continue_test = punch_hole_in_backing_store(vm, pt_region);
- if (cmd & CMD_HOLE_DATA)
- continue_test = punch_hole_in_backing_store(vm, data_region);
- if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
- TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
- "Missing write in dirty log");
- if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
- TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
- "Missing s1ptw write in dirty log");
- if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
- TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
- "Unexpected write in dirty log");
- if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
- TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
- "Unexpected s1ptw write in dirty log");
-
- return continue_test;
-}
-
-void fail_vcpu_run_no_handler(int ret)
-{
- TEST_FAIL("Unexpected vcpu run failure");
-}
-
-void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
-{
- TEST_ASSERT(errno == ENOSYS,
- "The mmio handler should have returned not implemented.");
- events.fail_vcpu_runs += 1;
-}
-
-typedef uint32_t aarch64_insn_t;
-extern aarch64_insn_t __exec_test[2];
-
-noinline void __return_0x77(void)
-{
- asm volatile("__exec_test: mov x0, #0x77\n"
- "ret\n");
-}
-
-/*
- * Note that this function runs on the host before the test VM starts: there's
- * no need to sync the D$ and I$ caches.
- */
-static void load_exec_code_for_test(struct kvm_vm *vm)
-{
- uint64_t *code;
- struct userspace_mem_region *region;
- void *hva;
-
- region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
- hva = (void *)region->region.userspace_addr;
-
- assert(TEST_EXEC_GVA > TEST_GVA);
- code = hva + TEST_EXEC_GVA - TEST_GVA;
- memcpy(code, __exec_test, sizeof(__exec_test));
-}
-
-static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
- struct test_desc *test)
-{
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_DABT_CUR, no_dabt_handler);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_IABT_CUR, no_iabt_handler);
-}
-
-static void setup_gva_maps(struct kvm_vm *vm)
-{
- struct userspace_mem_region *region;
- uint64_t pte_gpa;
-
- region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
- /* Map TEST_GVA first. This will install a new PTE. */
- virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
- /* Then map TEST_PTE_GVA to the above PTE. */
- pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
- virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
-}
-
-enum pf_test_memslots {
- CODE_AND_DATA_MEMSLOT,
- PAGE_TABLE_MEMSLOT,
- TEST_DATA_MEMSLOT,
-};
-
-/*
- * Create a memslot for code and data at pfn=0, and test-data and PT ones
- * at max_gfn.
- */
-static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
-{
- uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
- uint64_t guest_page_size = vm->page_size;
- uint64_t max_gfn = vm_compute_max_gfn(vm);
- /* Enough for 2M of code when using 4K guest pages. */
- uint64_t code_npages = 512;
- uint64_t pt_size, data_size, data_gpa;
-
- /*
- * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
- * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13
- * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
- * twice that just in case.
- */
- pt_size = 26 * guest_page_size;
-
- /* memslot sizes and gpa's must be aligned to the backing page size */
- pt_size = align_up(pt_size, backing_src_pagesz);
- data_size = align_up(guest_page_size, backing_src_pagesz);
- data_gpa = (max_gfn * guest_page_size) - data_size;
- data_gpa = align_down(data_gpa, backing_src_pagesz);
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
- CODE_AND_DATA_MEMSLOT, code_npages, 0);
- vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
- vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
-
- vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
- PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
- p->test_desc->pt_memslot_flags);
- vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
-
- vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
- data_size / guest_page_size,
- p->test_desc->data_memslot_flags);
- vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
-}
-
-static void setup_ucall(struct kvm_vm *vm)
-{
- struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-
- ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
-}
-
-static void setup_default_handlers(struct test_desc *test)
-{
- if (!test->mmio_handler)
- test->mmio_handler = mmio_no_handler;
-
- if (!test->fail_vcpu_run_handler)
- test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
-}
-
-static void check_event_counts(struct test_desc *test)
-{
- TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
- TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
- TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
-}
-
-static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
-{
- struct test_desc *test = p->test_desc;
-
- pr_debug("Test: %s\n", test->name);
- pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- pr_debug("Testing memory backing src type: %s\n",
- vm_mem_backing_src_alias(p->src_type)->name);
-}
-
-static void reset_event_counts(void)
-{
- memset(&events, 0, sizeof(events));
-}
-
-/*
- * This function either succeeds, skips the test (after setting test->skip), or
- * fails with a TEST_FAIL that aborts all tests.
- */
-static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
- struct test_desc *test)
-{
- struct kvm_run *run;
- struct ucall uc;
- int ret;
-
- run = vcpu->run;
-
- for (;;) {
- ret = _vcpu_run(vcpu);
- if (ret) {
- test->fail_vcpu_run_handler(ret);
- goto done;
- }
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- if (!handle_cmd(vm, uc.args[1])) {
- test->skip = true;
- goto done;
- }
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- goto done;
- case UCALL_NONE:
- if (run->exit_reason == KVM_EXIT_MMIO)
- test->mmio_handler(vm, run);
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
-done:
- pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
-}
-
-static void run_test(enum vm_guest_mode mode, void *arg)
-{
- struct test_params *p = (struct test_params *)arg;
- struct test_desc *test = p->test_desc;
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- struct uffd_desc *pt_uffd, *data_uffd;
-
- print_test_banner(mode, p);
-
- vm = ____vm_create(VM_SHAPE(mode));
- setup_memslots(vm, p);
- kvm_vm_elf_load(vm, program_invocation_name);
- setup_ucall(vm);
- vcpu = vm_vcpu_add(vm, 0, guest_code);
-
- setup_gva_maps(vm);
-
- reset_event_counts();
-
- /*
- * Set some code in the data memslot for the guest to execute (only
- * applicable to the EXEC tests). This has to be done before
- * setup_uffd() as that function copies the memslot data for the uffd
- * handler.
- */
- load_exec_code_for_test(vm);
- setup_uffd(vm, p, &pt_uffd, &data_uffd);
- setup_abort_handlers(vm, vcpu, test);
- setup_default_handlers(test);
- vcpu_args_set(vcpu, 1, test);
-
- vcpu_run_loop(vm, vcpu, test);
-
- kvm_vm_free(vm);
- free_uffd(test, pt_uffd, data_uffd);
-
- /*
- * Make sure we check the events after the uffd threads have exited,
- * which means they updated their respective event counters.
- */
- if (!test->skip)
- check_event_counts(test);
-}
-
-static void help(char *name)
-{
- puts("");
- printf("usage: %s [-h] [-s mem-type]\n", name);
- puts("");
- guest_modes_help();
- backing_src_help("-s");
- puts("");
-}
-
-#define SNAME(s) #s
-#define SCAT2(a, b) SNAME(a ## _ ## b)
-#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))
-#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))
-
-#define _CHECK(_test) _CHECK_##_test
-#define _PREPARE(_test) _PREPARE_##_test
-#define _PREPARE_guest_read64 NULL
-#define _PREPARE_guest_ld_preidx NULL
-#define _PREPARE_guest_write64 NULL
-#define _PREPARE_guest_st_preidx NULL
-#define _PREPARE_guest_exec NULL
-#define _PREPARE_guest_at NULL
-#define _PREPARE_guest_dc_zva guest_check_dc_zva
-#define _PREPARE_guest_cas guest_check_lse
-
-/* With or without access flag checks */
-#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af
-#define _PREPARE_no_af NULL
-#define _CHECK_with_af guest_check_pte_af
-#define _CHECK_no_af NULL
-
-/* Performs an access and checks that no faults were triggered. */
-#define TEST_ACCESS(_access, _with_af, _mark_cmd) \
-{ \
- .name = SCAT3(_access, _with_af, #_mark_cmd), \
- .guest_prepare = { _PREPARE(_with_af), \
- _PREPARE(_access) }, \
- .mem_mark_cmd = _mark_cmd, \
- .guest_test = _access, \
- .guest_test_check = { _CHECK(_with_af) }, \
- .expected_events = { 0 }, \
-}
-
-#define TEST_UFFD(_access, _with_af, _mark_cmd, \
- _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \
-{ \
- .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \
- .guest_prepare = { _PREPARE(_with_af), \
- _PREPARE(_access) }, \
- .guest_test = _access, \
- .mem_mark_cmd = _mark_cmd, \
- .guest_test_check = { _CHECK(_with_af) }, \
- .uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = _uffd_pt_handler, \
- .expected_events = { .uffd_faults = _uffd_faults, }, \
-}
-
-#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
-{ \
- .name = SCAT3(dirty_log, _access, _with_af), \
- .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
- .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
- .guest_prepare = { _PREPARE(_with_af), \
- _PREPARE(_access) }, \
- .guest_test = _access, \
- .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
- .expected_events = { 0 }, \
-}
-
-#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
- _uffd_faults, _test_check, _pt_check) \
-{ \
- .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
- .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
- .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
- .guest_prepare = { _PREPARE(_with_af), \
- _PREPARE(_access) }, \
- .guest_test = _access, \
- .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
- .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
- .uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = uffd_pt_handler, \
- .expected_events = { .uffd_faults = _uffd_faults, }, \
-}
-
-#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
-{ \
- .name = SCAT2(ro_memslot, _access), \
- .data_memslot_flags = KVM_MEM_READONLY, \
- .pt_memslot_flags = KVM_MEM_READONLY, \
- .guest_prepare = { _PREPARE(_access) }, \
- .guest_test = _access, \
- .mmio_handler = _mmio_handler, \
- .expected_events = { .mmio_exits = _mmio_exits }, \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \
-{ \
- .name = SCAT2(ro_memslot_no_syndrome, _access), \
- .data_memslot_flags = KVM_MEM_READONLY, \
- .pt_memslot_flags = KVM_MEM_READONLY, \
- .guest_prepare = { _PREPARE(_access) }, \
- .guest_test = _access, \
- .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
- .expected_events = { .fail_vcpu_runs = 1 }, \
-}
-
-#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
- _test_check) \
-{ \
- .name = SCAT2(ro_memslot, _access), \
- .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
- .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
- .guest_prepare = { _PREPARE(_access) }, \
- .guest_test = _access, \
- .guest_test_check = { _test_check }, \
- .mmio_handler = _mmio_handler, \
- .expected_events = { .mmio_exits = _mmio_exits}, \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \
-{ \
- .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
- .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
- .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
- .guest_prepare = { _PREPARE(_access) }, \
- .guest_test = _access, \
- .guest_test_check = { _test_check }, \
- .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
- .expected_events = { .fail_vcpu_runs = 1 }, \
-}
-
-#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \
- _uffd_data_handler, _uffd_faults) \
-{ \
- .name = SCAT2(ro_memslot_uffd, _access), \
- .data_memslot_flags = KVM_MEM_READONLY, \
- .pt_memslot_flags = KVM_MEM_READONLY, \
- .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
- .guest_prepare = { _PREPARE(_access) }, \
- .guest_test = _access, \
- .uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = uffd_pt_handler, \
- .mmio_handler = _mmio_handler, \
- .expected_events = { .mmio_exits = _mmio_exits, \
- .uffd_faults = _uffd_faults }, \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \
- _uffd_faults) \
-{ \
- .name = SCAT2(ro_memslot_no_syndrome, _access), \
- .data_memslot_flags = KVM_MEM_READONLY, \
- .pt_memslot_flags = KVM_MEM_READONLY, \
- .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
- .guest_prepare = { _PREPARE(_access) }, \
- .guest_test = _access, \
- .uffd_data_handler = _uffd_data_handler, \
- .uffd_pt_handler = uffd_pt_handler, \
- .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
- .expected_events = { .fail_vcpu_runs = 1, \
- .uffd_faults = _uffd_faults }, \
-}
-
-static struct test_desc tests[] = {
-
- /* Check that HW is setting the Access Flag (AF) (sanity checks). */
- TEST_ACCESS(guest_read64, with_af, CMD_NONE),
- TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
- TEST_ACCESS(guest_cas, with_af, CMD_NONE),
- TEST_ACCESS(guest_write64, with_af, CMD_NONE),
- TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
- TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
- TEST_ACCESS(guest_exec, with_af, CMD_NONE),
-
- /*
- * Punch a hole in the data backing store, and then try multiple
- * accesses: reads should rturn zeroes, and writes should
- * re-populate the page. Moreover, the test also check that no
- * exception was generated in the guest. Note that this
- * reading/writing behavior is the same as reading/writing a
- * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
- * userspace.
- */
- TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
- TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
- TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
- TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
- TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
- TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
- TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
-
- /*
- * Punch holes in the data and PT backing stores and mark them for
- * userfaultfd handling. This should result in 2 faults: the access
- * on the data backing store, and its respective S1 page table walk
- * (S1PTW).
- */
- TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- /*
- * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
- * The S1PTW fault should still be marked as a write.
- */
- TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_no_handler, uffd_pt_handler, 1),
- TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
- TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
- uffd_data_handler, uffd_pt_handler, 2),
-
- /*
- * Try accesses when the data and PT memory regions are both
- * tracked for dirty logging.
- */
- TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
- guest_check_no_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_ld_preidx, with_af,
- guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
- guest_check_no_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
-
- /*
- * Access when the data and PT memory regions are both marked for
- * dirty logging and UFFD at the same time. The expected result is
- * that writes should mark the dirty log and trigger a userfaultfd
- * write fault. Reads/execs should result in a read userfaultfd
- * fault, and nothing in the dirty log. Any S1PTW should result in
- * a write in the dirty log and a userfaultfd write.
- */
- TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
- uffd_data_handler, 2,
- guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
- uffd_data_handler, 2,
- guest_check_no_write_in_dirty_log,
- guest_check_no_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
- uffd_data_handler,
- 2, guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
- guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
- uffd_data_handler, 2,
- guest_check_no_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
- uffd_data_handler,
- 2, guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
- uffd_data_handler, 2,
- guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
- uffd_data_handler,
- 2, guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
- uffd_data_handler, 2,
- guest_check_write_in_dirty_log,
- guest_check_s1ptw_wr_in_dirty_log),
- /*
- * Access when both the PT and data regions are marked read-only
- * (with KVM_MEM_READONLY). Writes with a syndrome result in an
- * MMIO exit, writes with no syndrome (e.g., CAS) result in a
- * failed vcpu run, and reads/execs with and without syndroms do
- * not fault.
- */
- TEST_RO_MEMSLOT(guest_read64, 0, 0),
- TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
- TEST_RO_MEMSLOT(guest_at, 0, 0),
- TEST_RO_MEMSLOT(guest_exec, 0, 0),
- TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
- TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
- TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
- TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
-
- /*
- * The PT and data regions are both read-only and marked
- * for dirty logging at the same time. The expected result is that
- * for writes there should be no write in the dirty log. The
- * readonly handling is the same as if the memslot was not marked
- * for dirty logging: writes with a syndrome result in an MMIO
- * exit, and writes with no syndrome result in a failed vcpu run.
- */
- TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
- guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
- guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
- guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
- guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
- 1, guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
- guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
- guest_check_no_write_in_dirty_log),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
- guest_check_no_write_in_dirty_log),
-
- /*
- * The PT and data regions are both read-only and punched with
- * holes tracked with userfaultfd. The expected result is the
- * union of both userfaultfd and read-only behaviors. For example,
- * write accesses result in a userfaultfd write fault and an MMIO
- * exit. Writes with no syndrome result in a failed vcpu run and
- * no userfaultfd write fault. Reads result in userfaultfd getting
- * triggered.
- */
- TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
- TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
- TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
- TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
- TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
- uffd_data_handler, 2),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
- TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
-
- { 0 }
-};
-
-static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
-{
- struct test_desc *t;
-
- for (t = &tests[0]; t->name; t++) {
- if (t->skip)
- continue;
-
- struct test_params p = {
- .src_type = src_type,
- .test_desc = t,
- };
-
- for_each_guest_mode(run_test, &p);
- }
-}
-
-int main(int argc, char *argv[])
-{
- enum vm_mem_backing_src_type src_type;
- int opt;
-
- src_type = DEFAULT_VM_MEM_SRC;
-
- while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
- switch (opt) {
- case 'm':
- guest_modes_cmdline(optarg);
- break;
- case 's':
- src_type = parse_backing_src_type(optarg);
- break;
- case 'h':
- default:
- help(argv[0]);
- exit(0);
- }
- }
-
- for_each_test_and_guest_mode(src_type);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * psci_test - Tests relating to KVM's PSCI implementation.
- *
- * Copyright (c) 2021 Google LLC.
- *
- * This test includes:
- * - A regression test for a race between KVM servicing the PSCI CPU_ON call
- * and userspace reading the targeted vCPU's registers.
- * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
- * KVM_SYSTEM_EVENT_SUSPEND UAPI.
- */
-
-#include <linux/kernel.h>
-#include <linux/psci.h>
-#include <asm/cputype.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
-#define CPU_ON_CONTEXT_ID 0xdeadc0deul
-
-static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
- uint64_t context_id)
-{
- struct arm_smccc_res res;
-
- smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
- 0, 0, 0, 0, &res);
-
- return res.a0;
-}
-
-static uint64_t psci_affinity_info(uint64_t target_affinity,
- uint64_t lowest_affinity_level)
-{
- struct arm_smccc_res res;
-
- smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
- 0, 0, 0, 0, 0, &res);
-
- return res.a0;
-}
-
-static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
-{
- struct arm_smccc_res res;
-
- smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
- 0, 0, 0, 0, 0, &res);
-
- return res.a0;
-}
-
-static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
-{
- struct arm_smccc_res res;
-
- smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
-
- return res.a0;
-}
-
-static uint64_t psci_features(uint32_t func_id)
-{
- struct arm_smccc_res res;
-
- smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
-
- return res.a0;
-}
-
-static void vcpu_power_off(struct kvm_vcpu *vcpu)
-{
- struct kvm_mp_state mp_state = {
- .mp_state = KVM_MP_STATE_STOPPED,
- };
-
- vcpu_mp_state_set(vcpu, &mp_state);
-}
-
-static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
- struct kvm_vcpu **target)
-{
- struct kvm_vcpu_init init;
- struct kvm_vm *vm;
-
- vm = vm_create(2);
-
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
- init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
- *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
- *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
-
- return vm;
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- if (get_ucall(vcpu, &uc) == UCALL_ABORT)
- REPORT_GUEST_ASSERT(uc);
-}
-
-static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
-{
- uint64_t obs_pc, obs_x0;
-
- obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
- obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
-
- TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
- "unexpected target cpu pc: %lx (expected: %lx)",
- obs_pc, CPU_ON_ENTRY_ADDR);
- TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
- "unexpected target context id: %lx (expected: %lx)",
- obs_x0, CPU_ON_CONTEXT_ID);
-}
-
-static void guest_test_cpu_on(uint64_t target_cpu)
-{
- uint64_t target_state;
-
- GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
-
- do {
- target_state = psci_affinity_info(target_cpu, 0);
-
- GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
- (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
- } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
-
- GUEST_DONE();
-}
-
-static void host_test_cpu_on(void)
-{
- struct kvm_vcpu *source, *target;
- uint64_t target_mpidr;
- struct kvm_vm *vm;
- struct ucall uc;
-
- vm = setup_vm(guest_test_cpu_on, &source, &target);
-
- /*
- * make sure the target is already off when executing the test.
- */
- vcpu_power_off(target);
-
- target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
- vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
- enter_guest(source);
-
- if (get_ucall(source, &uc) != UCALL_DONE)
- TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
-
- assert_vcpu_reset(target);
- kvm_vm_free(vm);
-}
-
-static void guest_test_system_suspend(void)
-{
- uint64_t ret;
-
- /* assert that SYSTEM_SUSPEND is discoverable */
- GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
- GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
-
- ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
- GUEST_SYNC(ret);
-}
-
-static void host_test_system_suspend(void)
-{
- struct kvm_vcpu *source, *target;
- struct kvm_run *run;
- struct kvm_vm *vm;
-
- vm = setup_vm(guest_test_system_suspend, &source, &target);
- vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
-
- vcpu_power_off(target);
- run = source->run;
-
- enter_guest(source);
-
- TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
- TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
- "Unhandled system event: %u (expected: %u)",
- run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
-
- kvm_vm_free(vm);
-}
-
-static void guest_test_system_off2(void)
-{
- uint64_t ret;
-
- /* assert that SYSTEM_OFF2 is discoverable */
- GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
- PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
- GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
- PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
-
- /* With non-zero 'cookie' field, it should fail */
- ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
- GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
-
- /*
- * This would normally never return, so KVM sets the return value
- * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
- * that it can test both values for HIBERNATE_OFF.
- */
- ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
- GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
-
- /*
- * Revision F.b of the PSCI v1.3 specification documents zero as an
- * alias for HIBERNATE_OFF, since that's the value used in earlier
- * revisions of the spec and some implementations in the field.
- */
- ret = psci_system_off2(0, 1);
- GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
-
- ret = psci_system_off2(0, 0);
- GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
-
- GUEST_DONE();
-}
-
-static void host_test_system_off2(void)
-{
- struct kvm_vcpu *source, *target;
- struct kvm_mp_state mps;
- uint64_t psci_version = 0;
- int nr_shutdowns = 0;
- struct kvm_run *run;
- struct ucall uc;
-
- setup_vm(guest_test_system_off2, &source, &target);
-
- psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
-
- TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
- "Unexpected PSCI version %lu.%lu",
- PSCI_VERSION_MAJOR(psci_version),
- PSCI_VERSION_MINOR(psci_version));
-
- vcpu_power_off(target);
- run = source->run;
-
- enter_guest(source);
- while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
- TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
- "Unhandled system event: %u (expected: %u)",
- run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
- TEST_ASSERT(run->system_event.ndata >= 1,
- "Unexpected amount of system event data: %u (expected, >= 1)",
- run->system_event.ndata);
- TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
- "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
- run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
-
- nr_shutdowns++;
-
- /* Restart the vCPU */
- mps.mp_state = KVM_MP_STATE_RUNNABLE;
- vcpu_mp_state_set(source, &mps);
-
- enter_guest(source);
- }
-
- TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
- TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
-}
-
-int main(void)
-{
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
-
- host_test_cpu_on();
- host_test_system_suspend();
- host_test_system_off2();
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * set_id_regs - Test for setting ID register from usersapce.
- *
- * Copyright (c) 2023 Google LLC.
- *
- *
- * Test that KVM supports setting ID registers from userspace and handles the
- * feature set correctly.
- */
-
-#include <stdint.h>
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-#include <linux/bitfield.h>
-
-enum ftr_type {
- FTR_EXACT, /* Use a predefined safe value */
- FTR_LOWER_SAFE, /* Smaller value is safe */
- FTR_HIGHER_SAFE, /* Bigger value is safe */
- FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */
- FTR_END, /* Mark the last ftr bits */
-};
-
-#define FTR_SIGNED true /* Value should be treated as signed */
-#define FTR_UNSIGNED false /* Value should be treated as unsigned */
-
-struct reg_ftr_bits {
- char *name;
- bool sign;
- enum ftr_type type;
- uint8_t shift;
- uint64_t mask;
- /*
- * For FTR_EXACT, safe_val is used as the exact safe value.
- * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
- */
- int64_t safe_val;
-};
-
-struct test_feature_reg {
- uint32_t reg;
- const struct reg_ftr_bits *ftr_bits;
-};
-
-#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
- { \
- .name = #NAME, \
- .sign = SIGNED, \
- .type = TYPE, \
- .shift = SHIFT, \
- .mask = MASK, \
- .safe_val = SAFE_VAL, \
- }
-
-#define REG_FTR_BITS(type, reg, field, safe_val) \
- __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
- reg##_##field##_MASK, safe_val)
-
-#define S_REG_FTR_BITS(type, reg, field, safe_val) \
- __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
- reg##_##field##_MASK, safe_val)
-
-#define REG_FTR_END \
- { \
- .type = FTR_END, \
- }
-
-static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
- S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
- S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
- S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
- REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
- S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
- S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
- REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
- REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
- REG_FTR_END,
-};
-
-#define TEST_REG(id, table) \
- { \
- .reg = id, \
- .ftr_bits = &((table)[0]), \
- }
-
-static struct test_feature_reg test_regs[] = {
- TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
- TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
- TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
- TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
- TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
- TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
- TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
- TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
- TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
- TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
- TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
-};
-
-#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
-
-static void guest_code(void)
-{
- GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
- GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
- GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
- GUEST_REG_SYNC(SYS_CTR_EL0);
-
- GUEST_DONE();
-}
-
-/* Return a safe value to a given ftr_bits an ftr value */
-uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
-{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
-
- if (ftr_bits->sign == FTR_UNSIGNED) {
- switch (ftr_bits->type) {
- case FTR_EXACT:
- ftr = ftr_bits->safe_val;
- break;
- case FTR_LOWER_SAFE:
- if (ftr > ftr_bits->safe_val)
- ftr--;
- break;
- case FTR_HIGHER_SAFE:
- if (ftr < ftr_max)
- ftr++;
- break;
- case FTR_HIGHER_OR_ZERO_SAFE:
- if (ftr == ftr_max)
- ftr = 0;
- else if (ftr != 0)
- ftr++;
- break;
- default:
- break;
- }
- } else if (ftr != ftr_max) {
- switch (ftr_bits->type) {
- case FTR_EXACT:
- ftr = ftr_bits->safe_val;
- break;
- case FTR_LOWER_SAFE:
- if (ftr > ftr_bits->safe_val)
- ftr--;
- break;
- case FTR_HIGHER_SAFE:
- if (ftr < ftr_max - 1)
- ftr++;
- break;
- case FTR_HIGHER_OR_ZERO_SAFE:
- if (ftr != 0 && ftr != ftr_max - 1)
- ftr++;
- break;
- default:
- break;
- }
- }
-
- return ftr;
-}
-
-/* Return an invalid value to a given ftr_bits an ftr value */
-uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
-{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
-
- if (ftr_bits->sign == FTR_UNSIGNED) {
- switch (ftr_bits->type) {
- case FTR_EXACT:
- ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
- break;
- case FTR_LOWER_SAFE:
- ftr++;
- break;
- case FTR_HIGHER_SAFE:
- ftr--;
- break;
- case FTR_HIGHER_OR_ZERO_SAFE:
- if (ftr == 0)
- ftr = ftr_max;
- else
- ftr--;
- break;
- default:
- break;
- }
- } else if (ftr != ftr_max) {
- switch (ftr_bits->type) {
- case FTR_EXACT:
- ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
- break;
- case FTR_LOWER_SAFE:
- ftr++;
- break;
- case FTR_HIGHER_SAFE:
- ftr--;
- break;
- case FTR_HIGHER_OR_ZERO_SAFE:
- if (ftr == 0)
- ftr = ftr_max - 1;
- else
- ftr--;
- break;
- default:
- break;
- }
- } else {
- ftr = 0;
- }
-
- return ftr;
-}
-
-static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
- const struct reg_ftr_bits *ftr_bits)
-{
- uint8_t shift = ftr_bits->shift;
- uint64_t mask = ftr_bits->mask;
- uint64_t val, new_val, ftr;
-
- val = vcpu_get_reg(vcpu, reg);
- ftr = (val & mask) >> shift;
-
- ftr = get_safe_value(ftr_bits, ftr);
-
- ftr <<= shift;
- val &= ~mask;
- val |= ftr;
-
- vcpu_set_reg(vcpu, reg, val);
- new_val = vcpu_get_reg(vcpu, reg);
- TEST_ASSERT_EQ(new_val, val);
-
- return new_val;
-}
-
-static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
- const struct reg_ftr_bits *ftr_bits)
-{
- uint8_t shift = ftr_bits->shift;
- uint64_t mask = ftr_bits->mask;
- uint64_t val, old_val, ftr;
- int r;
-
- val = vcpu_get_reg(vcpu, reg);
- ftr = (val & mask) >> shift;
-
- ftr = get_invalid_value(ftr_bits, ftr);
-
- old_val = val;
- ftr <<= shift;
- val &= ~mask;
- val |= ftr;
-
- r = __vcpu_set_reg(vcpu, reg, val);
- TEST_ASSERT(r < 0 && errno == EINVAL,
- "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
-
- val = vcpu_get_reg(vcpu, reg);
- TEST_ASSERT_EQ(val, old_val);
-}
-
-static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-
-#define encoding_to_range_idx(encoding) \
- KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \
- sys_reg_CRn(encoding), sys_reg_CRm(encoding), \
- sys_reg_Op2(encoding))
-
-
-static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
-{
- uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
- struct reg_mask_range range = {
- .addr = (__u64)masks,
- };
- int ret;
-
- /* KVM should return error when reserved field is not zero */
- range.reserved[0] = 1;
- ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
- TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
-
- /* Get writable masks for feature ID registers */
- memset(range.reserved, 0, sizeof(range.reserved));
- vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-
- for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
- const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
- uint32_t reg_id = test_regs[i].reg;
- uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
- int idx;
-
- /* Get the index to masks array for the idreg */
- idx = encoding_to_range_idx(reg_id);
-
- for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
- /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
- if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
- ksft_test_result_skip("%s on AARCH64 only system\n",
- ftr_bits[j].name);
- continue;
- }
-
- /* Make sure the feature field is writable */
- TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
-
- test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
-
- test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
- &ftr_bits[j]);
-
- ksft_test_result_pass("%s\n", ftr_bits[j].name);
- }
- }
-}
-
-#define MPAM_IDREG_TEST 6
-static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
-{
- uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
- struct reg_mask_range range = {
- .addr = (__u64)masks,
- };
- uint64_t val;
- int idx, err;
-
- /*
- * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
- * check that if it can be set to 1, (i.e. it is supported by the
- * hardware), that it can't be set to other values.
- */
-
- /* Get writable masks for feature ID registers */
- memset(range.reserved, 0, sizeof(range.reserved));
- vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-
- /* Writeable? Nothing to test! */
- idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
- if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
- ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
- return;
- }
-
- /* Get the id register value */
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-
- /* Try to set MPAM=0. This should always be possible. */
- val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
- val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
- err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
- if (err)
- ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
- else
- ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
-
- /* Try to set MPAM=1 */
- val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
- val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
- err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
- if (err)
- ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
- else
- ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
-
- /* Try to set MPAM=2 */
- val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
- val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
- err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
- if (err)
- ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
- else
- ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
-
- /* And again for ID_AA64PFR1_EL1.MPAM_frac */
- idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
- if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
- ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
- return;
- }
-
- /* Get the id register value */
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
-
- /* Try to set MPAM_frac=0. This should always be possible. */
- val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
- val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
- err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
- if (err)
- ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
- else
- ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
-
- /* Try to set MPAM_frac=1 */
- val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
- val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
- err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
- if (err)
- ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
- else
- ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
-
- /* Try to set MPAM_frac=2 */
- val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
- val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
- err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
- if (err)
- ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
- else
- ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
-}
-
-static void test_guest_reg_read(struct kvm_vcpu *vcpu)
-{
- bool done = false;
- struct ucall uc;
-
- while (!done) {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_SYNC:
- /* Make sure the written values are seen by guest */
- TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
- uc.args[3]);
- break;
- case UCALL_DONE:
- done = true;
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
- }
-}
-
-/* Politely lifted from arch/arm64/include/asm/cache.h */
-/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
-#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
-#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
-#define CLIDR_CTYPE(clidr, level) \
- (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
-
-static void test_clidr(struct kvm_vcpu *vcpu)
-{
- uint64_t clidr;
- int level;
-
- clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
-
- /* find the first empty level in the cache hierarchy */
- for (level = 1; level < 7; level++) {
- if (!CLIDR_CTYPE(clidr, level))
- break;
- }
-
- /*
- * If you have a mind-boggling 7 levels of cache, congratulations, you
- * get to fix this.
- */
- TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
-
- /* stick in a unified cache level */
- clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
-
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
- test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
-}
-
-static void test_ctr(struct kvm_vcpu *vcpu)
-{
- u64 ctr;
-
- ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
- ctr &= ~CTR_EL0_DIC_MASK;
- if (ctr & CTR_EL0_IminLine_MASK)
- ctr--;
-
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
- test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
-}
-
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- test_clidr(vcpu);
- test_ctr(vcpu);
-
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
- val++;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
-
- test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
- ksft_test_result_pass("%s\n", __func__);
-}
-
-static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
-{
- size_t idx = encoding_to_range_idx(encoding);
- uint64_t observed;
-
- observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
- TEST_ASSERT_EQ(test_reg_vals[idx], observed);
-}
-
-static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
-{
- /*
- * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
- * architectural reset of the vCPU.
- */
- aarch64_vcpu_setup(vcpu, NULL);
-
- for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
- test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
-
- test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
- test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
- test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
-
- ksft_test_result_pass("%s\n", __func__);
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- bool aarch64_only;
- uint64_t val, el0;
- int test_cnt;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- /* Check for AARCH64 only system */
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
- aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
-
- ksft_print_header();
-
- test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
- ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
- MPAM_IDREG_TEST;
-
- ksft_set_plan(test_cnt);
-
- test_vm_ftr_id_regs(vcpu, aarch64_only);
- test_vcpu_ftr_id_regs(vcpu);
- test_user_set_mpam_reg(vcpu);
-
- test_guest_reg_read(vcpu);
-
- test_reset_preserves_id_regs(vcpu);
-
- kvm_vm_free(vm);
-
- ksft_finished();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * smccc_filter - Tests for the SMCCC filter UAPI.
- *
- * Copyright (c) 2023 Google LLC
- *
- * This test includes:
- * - Tests that the UAPI constraints are upheld by KVM. For example, userspace
- * is prevented from filtering the architecture range of SMCCC calls.
- * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/psci.h>
-#include <stdint.h>
-
-#include "processor.h"
-#include "test_util.h"
-
-enum smccc_conduit {
- HVC_INSN,
- SMC_INSN,
-};
-
-#define for_each_conduit(conduit) \
- for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
-
-static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
-{
- struct arm_smccc_res res;
-
- if (conduit == SMC_INSN)
- smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
- else
- smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
-
- GUEST_SYNC(res.a0);
-}
-
-static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
- enum kvm_smccc_filter_action action)
-{
- struct kvm_smccc_filter filter = {
- .base = start,
- .nr_functions = nr_functions,
- .action = action,
- };
-
- return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
- KVM_ARM_VM_SMCCC_FILTER, &filter);
-}
-
-static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
- enum kvm_smccc_filter_action action)
-{
- int ret = __set_smccc_filter(vm, start, nr_functions, action);
-
- TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
-}
-
-static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
-{
- struct kvm_vcpu_init init;
- struct kvm_vm *vm;
-
- vm = vm_create(1);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-
- /*
- * Enable in-kernel emulation of PSCI to ensure that calls are denied
- * due to the SMCCC filter, not because of KVM.
- */
- init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
- *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
- return vm;
-}
-
-static void test_pad_must_be_zero(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = setup_vm(&vcpu);
- struct kvm_smccc_filter filter = {
- .base = PSCI_0_2_FN_PSCI_VERSION,
- .nr_functions = 1,
- .action = KVM_SMCCC_FILTER_DENY,
- .pad = { -1 },
- };
- int r;
-
- r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
- KVM_ARM_VM_SMCCC_FILTER, &filter);
- TEST_ASSERT(r < 0 && errno == EINVAL,
- "Setting filter with nonzero padding should return EINVAL");
-}
-
-/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
-static void test_filter_reserved_range(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = setup_vm(&vcpu);
- uint32_t smc64_fn;
- int r;
-
- r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
- 1, KVM_SMCCC_FILTER_DENY);
- TEST_ASSERT(r < 0 && errno == EEXIST,
- "Attempt to filter reserved range should return EEXIST");
-
- smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
- 0, 0);
-
- r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
- TEST_ASSERT(r < 0 && errno == EEXIST,
- "Attempt to filter reserved range should return EEXIST");
-
- kvm_vm_free(vm);
-}
-
-static void test_invalid_nr_functions(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = setup_vm(&vcpu);
- int r;
-
- r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
- TEST_ASSERT(r < 0 && errno == EINVAL,
- "Attempt to filter 0 functions should return EINVAL");
-
- kvm_vm_free(vm);
-}
-
-static void test_overflow_nr_functions(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = setup_vm(&vcpu);
- int r;
-
- r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
- TEST_ASSERT(r < 0 && errno == EINVAL,
- "Attempt to overflow filter range should return EINVAL");
-
- kvm_vm_free(vm);
-}
-
-static void test_reserved_action(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = setup_vm(&vcpu);
- int r;
-
- r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
- TEST_ASSERT(r < 0 && errno == EINVAL,
- "Attempt to use reserved filter action should return EINVAL");
-
- kvm_vm_free(vm);
-}
-
-
-/* Test that overlapping configurations of the SMCCC filter are rejected */
-static void test_filter_overlap(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = setup_vm(&vcpu);
- int r;
-
- set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
-
- r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
- TEST_ASSERT(r < 0 && errno == EEXIST,
- "Attempt to filter already configured range should return EEXIST");
-
- kvm_vm_free(vm);
-}
-
-static void expect_call_denied(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- if (get_ucall(vcpu, &uc) != UCALL_SYNC)
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-
- TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
- "Unexpected SMCCC return code: %lu", uc.args[1]);
-}
-
-/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
-static void test_filter_denied(void)
-{
- enum smccc_conduit conduit;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- for_each_conduit(conduit) {
- vm = setup_vm(&vcpu);
-
- set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
- vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
- vcpu_run(vcpu);
- expect_call_denied(vcpu);
-
- kvm_vm_free(vm);
- }
-}
-
-static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
- enum smccc_conduit conduit)
-{
- struct kvm_run *run = vcpu->run;
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
- "Unexpected exit reason: %u", run->exit_reason);
- TEST_ASSERT(run->hypercall.nr == func_id,
- "Unexpected SMCCC function: %llu", run->hypercall.nr);
-
- if (conduit == SMC_INSN)
- TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
- "KVM_HYPERCALL_EXIT_SMC is not set");
- else
- TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
- "KVM_HYPERCALL_EXIT_SMC is set");
-}
-
-/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
-static void test_filter_fwd_to_user(void)
-{
- enum smccc_conduit conduit;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- for_each_conduit(conduit) {
- vm = setup_vm(&vcpu);
-
- set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
- vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
- vcpu_run(vcpu);
- expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
- kvm_vm_free(vm);
- }
-}
-
-static bool kvm_supports_smccc_filter(void)
-{
- struct kvm_vm *vm = vm_create_barebones();
- int r;
-
- r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
-
- kvm_vm_free(vm);
- return !r;
-}
-
-int main(void)
-{
- TEST_REQUIRE(kvm_supports_smccc_filter());
-
- test_pad_must_be_zero();
- test_invalid_nr_functions();
- test_overflow_nr_functions();
- test_reserved_action();
- test_filter_reserved_range();
- test_filter_overlap();
- test_filter_denied();
- test_filter_fwd_to_user();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
- *
- * Copyright (c) 2022 Google LLC.
- *
- * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
- * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
- * configured.
- */
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-
-/*
- * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
- * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
- */
-static int add_init_2vcpus(struct kvm_vcpu_init *init0,
- struct kvm_vcpu_init *init1)
-{
- struct kvm_vcpu *vcpu0, *vcpu1;
- struct kvm_vm *vm;
- int ret;
-
- vm = vm_create_barebones();
-
- vcpu0 = __vm_vcpu_add(vm, 0);
- ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
- if (ret)
- goto free_exit;
-
- vcpu1 = __vm_vcpu_add(vm, 1);
- ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
-
-free_exit:
- kvm_vm_free(vm);
- return ret;
-}
-
-/*
- * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
- * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
- */
-static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
- struct kvm_vcpu_init *init1)
-{
- struct kvm_vcpu *vcpu0, *vcpu1;
- struct kvm_vm *vm;
- int ret;
-
- vm = vm_create_barebones();
-
- vcpu0 = __vm_vcpu_add(vm, 0);
- vcpu1 = __vm_vcpu_add(vm, 1);
-
- ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
- if (ret)
- goto free_exit;
-
- ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
-
-free_exit:
- kvm_vm_free(vm);
- return ret;
-}
-
-/*
- * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
- * configured, and two mixed-width vCPUs cannot be configured.
- * Each of those three cases, configure vCPUs in two different orders.
- * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
- * KVM_ARM_VCPU_INIT for them.
- * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
- * and then run those commands for another vCPU.
- */
-int main(void)
-{
- struct kvm_vcpu_init init0, init1;
- struct kvm_vm *vm;
- int ret;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
-
- /* Get the preferred target type and copy that to init1 for later use */
- vm = vm_create_barebones();
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
- kvm_vm_free(vm);
- init1 = init0;
-
- /* Test with 64bit vCPUs */
- ret = add_init_2vcpus(&init0, &init0);
- TEST_ASSERT(ret == 0,
- "Configuring 64bit EL1 vCPUs failed unexpectedly");
- ret = add_2vcpus_init_2vcpus(&init0, &init0);
- TEST_ASSERT(ret == 0,
- "Configuring 64bit EL1 vCPUs failed unexpectedly");
-
- /* Test with 32bit vCPUs */
- init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
- ret = add_init_2vcpus(&init0, &init0);
- TEST_ASSERT(ret == 0,
- "Configuring 32bit EL1 vCPUs failed unexpectedly");
- ret = add_2vcpus_init_2vcpus(&init0, &init0);
- TEST_ASSERT(ret == 0,
- "Configuring 32bit EL1 vCPUs failed unexpectedly");
-
- /* Test with mixed-width vCPUs */
- init0.features[0] = 0;
- init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
- ret = add_init_2vcpus(&init0, &init1);
- TEST_ASSERT(ret != 0,
- "Configuring mixed-width vCPUs worked unexpectedly");
- ret = add_2vcpus_init_2vcpus(&init0, &init1);
- TEST_ASSERT(ret != 0,
- "Configuring mixed-width vCPUs worked unexpectedly");
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic init sequence tests
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <linux/kernel.h>
-#include <sys/syscall.h>
-#include <asm/kvm.h>
-#include <asm/kvm_para.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vgic.h"
-
-#define NR_VCPUS 4
-
-#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-
-#define GICR_TYPER 0x8
-
-#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
-#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
-
-struct vm_gic {
- struct kvm_vm *vm;
- int gic_fd;
- uint32_t gic_dev_type;
-};
-
-static uint64_t max_phys_size;
-
-/*
- * Helpers to access a redistributor register and verify the ioctl() failed or
- * succeeded as expected, and provided the correct value on success.
- */
-static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
- int want, const char *msg)
-{
- uint32_t ignored_val;
- int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
- REG_OFFSET(vcpu, offset), &ignored_val);
-
- TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
-}
-
-static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
- const char *msg)
-{
- uint32_t val;
-
- kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
- REG_OFFSET(vcpu, offset), &val);
- TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
-}
-
-/* dummy guest code */
-static void guest_code(void)
-{
- GUEST_SYNC(0);
- GUEST_SYNC(1);
- GUEST_SYNC(2);
- GUEST_DONE();
-}
-
-/* we don't want to assert on run execution, hence that helper */
-static int run_vcpu(struct kvm_vcpu *vcpu)
-{
- return __vcpu_run(vcpu) ? -errno : 0;
-}
-
-static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
- uint32_t nr_vcpus,
- struct kvm_vcpu *vcpus[])
-{
- struct vm_gic v;
-
- v.gic_dev_type = gic_dev_type;
- v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
- v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
- return v;
-}
-
-static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
-{
- struct vm_gic v;
-
- v.gic_dev_type = gic_dev_type;
- v.vm = vm_create_barebones();
- v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
- return v;
-}
-
-
-static void vm_gic_destroy(struct vm_gic *v)
-{
- close(v->gic_fd);
- kvm_vm_free(v->vm);
-}
-
-struct vgic_region_attr {
- uint64_t attr;
- uint64_t size;
- uint64_t alignment;
-};
-
-struct vgic_region_attr gic_v3_dist_region = {
- .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
- .size = 0x10000,
- .alignment = 0x10000,
-};
-
-struct vgic_region_attr gic_v3_redist_region = {
- .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
- .size = NR_VCPUS * 0x20000,
- .alignment = 0x10000,
-};
-
-struct vgic_region_attr gic_v2_dist_region = {
- .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
- .size = 0x1000,
- .alignment = 0x1000,
-};
-
-struct vgic_region_attr gic_v2_cpu_region = {
- .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
- .size = 0x2000,
- .alignment = 0x1000,
-};
-
-/**
- * Helper routine that performs KVM device tests in general. Eventually the
- * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
- * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
- * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
- * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
- * DIST region @0x1000.
- */
-static void subtest_dist_rdist(struct vm_gic *v)
-{
- int ret;
- uint64_t addr;
- struct vgic_region_attr rdist; /* CPU interface in GICv2*/
- struct vgic_region_attr dist;
-
- rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
- : gic_v2_cpu_region;
- dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
- : gic_v2_dist_region;
-
- /* Check existing group/attributes */
- kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
-
- kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
-
- /* check non existing attribute */
- ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
- TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
-
- /* misaligned DIST and REDIST address settings */
- addr = dist.alignment / 0x10;
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
-
- addr = rdist.alignment / 0x10;
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
-
- /* out of range address */
- addr = max_phys_size;
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr, &addr);
- TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
-
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr);
- TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
-
- /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
- addr = max_phys_size - dist.alignment;
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr);
- TEST_ASSERT(ret && errno == E2BIG,
- "half of the redist is beyond IPA limit");
-
- /* set REDIST base address @0x0*/
- addr = 0x00000;
- kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr);
-
- /* Attempt to create a second legacy redistributor region */
- addr = 0xE0000;
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- rdist.attr, &addr);
- TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
-
- ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST);
- if (!ret) {
- /* Attempt to mix legacy and new redistributor regions */
- addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL,
- "attempt to mix GICv3 REDIST and REDIST_REGION");
- }
-
- /*
- * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
- * on first vcpu run instead.
- */
- addr = rdist.size - rdist.alignment;
- kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- dist.attr, &addr);
-}
-
-/* Test the new REDIST region API */
-static void subtest_v3_redist_regions(struct vm_gic *v)
-{
- uint64_t addr, expected_addr;
- int ret;
-
- ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST);
- TEST_ASSERT(!ret, "Multiple redist regions advertised");
-
- addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
-
- addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL,
- "attempt to register the first rdist region with index != 0");
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
- kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
-
- addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL,
- "register an rdist region overlapping with another one");
-
- addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
-
- addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
- kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == E2BIG,
- "register redist region with base address beyond IPA range");
-
- /* The last redist is above the pa range. */
- addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == E2BIG,
- "register redist region with top address beyond IPA range");
-
- addr = 0x260000;
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
- TEST_ASSERT(ret && errno == EINVAL,
- "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
-
- /*
- * Now there are 2 redist regions:
- * region 0 @ 0x200000 2 redists
- * region 1 @ 0x240000 1 redist
- * Attempt to read their characteristics
- */
-
- addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
- expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
- ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
-
- addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
- expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
- ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
-
- addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
- ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
-
- addr = 0x260000;
- kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
-
- addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
- ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
-}
-
-/*
- * VGIC KVM device is created and initialized before the secondary CPUs
- * get created
- */
-static void test_vgic_then_vcpus(uint32_t gic_dev_type)
-{
- struct kvm_vcpu *vcpus[NR_VCPUS];
- struct vm_gic v;
- int ret, i;
-
- v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
-
- subtest_dist_rdist(&v);
-
- /* Add the rest of the VCPUs */
- for (i = 1; i < NR_VCPUS; ++i)
- vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
-
- ret = run_vcpu(vcpus[3]);
- TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
-
- vm_gic_destroy(&v);
-}
-
-/* All the VCPUs are created before the VGIC KVM device gets initialized */
-static void test_vcpus_then_vgic(uint32_t gic_dev_type)
-{
- struct kvm_vcpu *vcpus[NR_VCPUS];
- struct vm_gic v;
- int ret;
-
- v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
-
- subtest_dist_rdist(&v);
-
- ret = run_vcpu(vcpus[3]);
- TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
-
- vm_gic_destroy(&v);
-}
-
-#define KVM_VGIC_V2_ATTR(offset, cpu) \
- (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
- FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
-
-#define GIC_CPU_CTRL 0x00
-
-static void test_v2_uaccess_cpuif_no_vcpus(void)
-{
- struct vm_gic v;
- u64 val = 0;
- int ret;
-
- v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
- subtest_dist_rdist(&v);
-
- ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
- KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
- TEST_ASSERT(ret && errno == EINVAL,
- "accessed non-existent CPU interface, want errno: %i",
- EINVAL);
- ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
- KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
- TEST_ASSERT(ret && errno == EINVAL,
- "accessed non-existent CPU interface, want errno: %i",
- EINVAL);
- ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
- KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
- TEST_ASSERT(ret && errno == EINVAL,
- "accessed non-existent CPU interface, want errno: %i",
- EINVAL);
-
- vm_gic_destroy(&v);
-}
-
-static void test_v3_new_redist_regions(void)
-{
- struct kvm_vcpu *vcpus[NR_VCPUS];
- void *dummy = NULL;
- struct vm_gic v;
- uint64_t addr;
- int ret;
-
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
- subtest_v3_redist_regions(&v);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- ret = run_vcpu(vcpus[3]);
- TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
- vm_gic_destroy(&v);
-
- /* step2 */
-
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
- subtest_v3_redist_regions(&v);
-
- addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- ret = run_vcpu(vcpus[3]);
- TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
-
- vm_gic_destroy(&v);
-
- /* step 3 */
-
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
- subtest_v3_redist_regions(&v);
-
- ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
- TEST_ASSERT(ret && errno == EFAULT,
- "register a third region allowing to cover the 4 vcpus");
-
- addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- ret = run_vcpu(vcpus[3]);
- TEST_ASSERT(!ret, "vcpu run");
-
- vm_gic_destroy(&v);
-}
-
-static void test_v3_typer_accesses(void)
-{
- struct vm_gic v;
- uint64_t addr;
- int ret, i;
-
- v.vm = vm_create(NR_VCPUS);
- (void)vm_vcpu_add(v.vm, 0, guest_code);
-
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-
- (void)vm_vcpu_add(v.vm, 3, guest_code);
-
- v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
- "attempting to read GICR_TYPER of non created vcpu");
-
- (void)vm_vcpu_add(v.vm, 1, guest_code);
-
- v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
- "read GICR_TYPER before GIC initialized");
-
- (void)vm_vcpu_add(v.vm, 2, guest_code);
-
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- for (i = 0; i < NR_VCPUS ; i++) {
- v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
- "read GICR_TYPER before rdist region setting");
- }
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- /* The 2 first rdists should be put there (vcpu 0 and 3) */
- v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
- v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
-
- addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
- ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
- TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
-
- v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
- "no redist region attached to vcpu #1 yet, last cannot be returned");
- v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
- "no redist region attached to vcpu #2, last cannot be returned");
-
- addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
- v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
- "read typer of rdist #1, last properly returned");
-
- vm_gic_destroy(&v);
-}
-
-static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
- uint32_t vcpuids[])
-{
- struct vm_gic v;
- int i;
-
- v.vm = vm_create(nr_vcpus);
- for (i = 0; i < nr_vcpus; i++)
- vm_vcpu_add(v.vm, vcpuids[i], guest_code);
-
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-
- return v;
-}
-
-/**
- * Test GICR_TYPER last bit with new redist regions
- * rdist regions #1 and #2 are contiguous
- * rdist region #0 @0x100000 2 rdist capacity
- * rdists: 0, 3 (Last)
- * rdist region #1 @0x240000 2 rdist capacity
- * rdists: 5, 4 (Last)
- * rdist region #2 @0x200000 2 rdist capacity
- * rdists: 1, 2
- */
-static void test_v3_last_bit_redist_regions(void)
-{
- uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
- struct vm_gic v;
- uint64_t addr;
-
- v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
-
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
- v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
- v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
- v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
- v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
- v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
- v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
-
- vm_gic_destroy(&v);
-}
-
-/* Test last bit with legacy region */
-static void test_v3_last_bit_single_rdist(void)
-{
- uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
- struct vm_gic v;
- uint64_t addr;
-
- v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
-
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- addr = 0x10000;
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-
- v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
- v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
- v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
- v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
- v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
-
- vm_gic_destroy(&v);
-}
-
-/* Uses the legacy REDIST region API. */
-static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
-{
- struct kvm_vcpu *vcpus[NR_VCPUS];
- struct vm_gic v;
- int ret, i;
- uint64_t addr;
-
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
-
- /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
- addr = max_phys_size - (3 * 2 * 0x10000);
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-
- addr = 0x00000;
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
-
- /* Add the rest of the VCPUs */
- for (i = 1; i < NR_VCPUS; ++i)
- vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
-
- kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- /* Attempt to run a vcpu without enough redist space. */
- ret = run_vcpu(vcpus[2]);
- TEST_ASSERT(ret && errno == EINVAL,
- "redist base+size above PA range detected on 1st vcpu run");
-
- vm_gic_destroy(&v);
-}
-
-static void test_v3_its_region(void)
-{
- struct kvm_vcpu *vcpus[NR_VCPUS];
- struct vm_gic v;
- uint64_t addr;
- int its_fd, ret;
-
- v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
- its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
-
- addr = 0x401000;
- ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr);
- TEST_ASSERT(ret && errno == EINVAL,
- "ITS region with misaligned address");
-
- addr = max_phys_size;
- ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr);
- TEST_ASSERT(ret && errno == E2BIG,
- "register ITS region with base address beyond IPA range");
-
- addr = max_phys_size - 0x10000;
- ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr);
- TEST_ASSERT(ret && errno == E2BIG,
- "Half of ITS region is beyond IPA range");
-
- /* This one succeeds setting the ITS base */
- addr = 0x400000;
- kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr);
-
- addr = 0x300000;
- ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &addr);
- TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
-
- close(its_fd);
- vm_gic_destroy(&v);
-}
-
-/*
- * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
- */
-int test_kvm_device(uint32_t gic_dev_type)
-{
- struct kvm_vcpu *vcpus[NR_VCPUS];
- struct vm_gic v;
- uint32_t other;
- int ret;
-
- v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
-
- /* try to create a non existing KVM device */
- ret = __kvm_test_create_device(v.vm, 0);
- TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
-
- /* trial mode */
- ret = __kvm_test_create_device(v.vm, gic_dev_type);
- if (ret)
- return ret;
- v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
- ret = __kvm_create_device(v.vm, gic_dev_type);
- TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
-
- /* try to create the other gic_dev_type */
- other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
- : KVM_DEV_TYPE_ARM_VGIC_V2;
-
- if (!__kvm_test_create_device(v.vm, other)) {
- ret = __kvm_create_device(v.vm, other);
- TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
- "create GIC device while other version exists");
- }
-
- vm_gic_destroy(&v);
-
- return 0;
-}
-
-void run_tests(uint32_t gic_dev_type)
-{
- test_vcpus_then_vgic(gic_dev_type);
- test_vgic_then_vcpus(gic_dev_type);
-
- if (VGIC_DEV_IS_V2(gic_dev_type))
- test_v2_uaccess_cpuif_no_vcpus();
-
- if (VGIC_DEV_IS_V3(gic_dev_type)) {
- test_v3_new_redist_regions();
- test_v3_typer_accesses();
- test_v3_last_bit_redist_regions();
- test_v3_last_bit_single_rdist();
- test_v3_redist_ipa_range_check_at_vcpu_run();
- test_v3_its_region();
- }
-}
-
-int main(int ac, char **av)
-{
- int ret;
- int pa_bits;
- int cnt_impl = 0;
-
- pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
- max_phys_size = 1ULL << pa_bits;
-
- ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
- if (!ret) {
- pr_info("Running GIC_v3 tests.\n");
- run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
- cnt_impl++;
- }
-
- ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
- if (!ret) {
- pr_info("Running GIC_v2 tests.\n");
- run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
- cnt_impl++;
- }
-
- if (!cnt_impl) {
- print_skip("No GICv2 nor GICv3 support");
- exit(KSFT_SKIP);
- }
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic_irq.c - Test userspace injection of IRQs
- *
- * This test validates the injection of IRQs from userspace using various
- * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
- * host to inject a specific intid via a GUEST_SYNC call, and then checks that
- * it received it.
- */
-#include <asm/kvm.h>
-#include <asm/kvm_para.h>
-#include <sys/eventfd.h>
-#include <linux/sizes.h>
-
-#include "processor.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "vgic.h"
-
-/*
- * Stores the user specified args; it's passed to the guest and to every test
- * function.
- */
-struct test_args {
- uint32_t nr_irqs; /* number of KVM supported IRQs. */
- bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
- bool level_sensitive; /* 1 is level, 0 is edge */
- int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
- bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
-};
-
-/*
- * KVM implements 32 priority levels:
- * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
- *
- * Note that these macros will still be correct in the case that KVM implements
- * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
- */
-#define KVM_NUM_PRIOS 32
-#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
-#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
-#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
-#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
-#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
-#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
-
-/*
- * The kvm_inject_* utilities are used by the guest to ask the host to inject
- * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
- */
-
-typedef enum {
- KVM_INJECT_EDGE_IRQ_LINE = 1,
- KVM_SET_IRQ_LINE,
- KVM_SET_IRQ_LINE_HIGH,
- KVM_SET_LEVEL_INFO_HIGH,
- KVM_INJECT_IRQFD,
- KVM_WRITE_ISPENDR,
- KVM_WRITE_ISACTIVER,
-} kvm_inject_cmd;
-
-struct kvm_inject_args {
- kvm_inject_cmd cmd;
- uint32_t first_intid;
- uint32_t num;
- int level;
- bool expect_failure;
-};
-
-/* Used on the guest side to perform the hypercall. */
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
- uint32_t num, int level, bool expect_failure);
-
-/* Used on the host side to get the hypercall info. */
-static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
- struct kvm_inject_args *args);
-
-#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
- kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
-
-#define KVM_INJECT_MULTI(cmd, intid, num) \
- _KVM_INJECT_MULTI(cmd, intid, num, false)
-
-#define _KVM_INJECT(cmd, intid, expect_failure) \
- _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
-
-#define KVM_INJECT(cmd, intid) \
- _KVM_INJECT_MULTI(cmd, intid, 1, false)
-
-#define KVM_ACTIVATE(cmd, intid) \
- kvm_inject_call(cmd, intid, 1, 1, false);
-
-struct kvm_inject_desc {
- kvm_inject_cmd cmd;
- /* can inject PPIs, PPIs, and/or SPIs. */
- bool sgi, ppi, spi;
-};
-
-static struct kvm_inject_desc inject_edge_fns[] = {
- /* sgi ppi spi */
- { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
- { KVM_INJECT_IRQFD, false, false, true },
- { KVM_WRITE_ISPENDR, true, false, true },
- { 0, },
-};
-
-static struct kvm_inject_desc inject_level_fns[] = {
- /* sgi ppi spi */
- { KVM_SET_IRQ_LINE_HIGH, false, true, true },
- { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
- { KVM_INJECT_IRQFD, false, false, true },
- { KVM_WRITE_ISPENDR, false, true, true },
- { 0, },
-};
-
-static struct kvm_inject_desc set_active_fns[] = {
- /* sgi ppi spi */
- { KVM_WRITE_ISACTIVER, true, true, true },
- { 0, },
-};
-
-#define for_each_inject_fn(t, f) \
- for ((f) = (t); (f)->cmd; (f)++)
-
-#define for_each_supported_inject_fn(args, t, f) \
- for_each_inject_fn(t, f) \
- if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
-
-#define for_each_supported_activate_fn(args, t, f) \
- for_each_supported_inject_fn((args), (t), (f))
-
-/* Shared between the guest main thread and the IRQ handlers. */
-volatile uint64_t irq_handled;
-volatile uint32_t irqnr_received[MAX_SPI + 1];
-
-static void reset_stats(void)
-{
- int i;
-
- irq_handled = 0;
- for (i = 0; i <= MAX_SPI; i++)
- irqnr_received[i] = 0;
-}
-
-static uint64_t gic_read_ap1r0(void)
-{
- uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
-
- dsb(sy);
- return reg;
-}
-
-static void gic_write_ap1r0(uint64_t val)
-{
- write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
- isb();
-}
-
-static void guest_set_irq_line(uint32_t intid, uint32_t level);
-
-static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
-{
- uint32_t intid = gic_get_and_ack_irq();
-
- if (intid == IAR_SPURIOUS)
- return;
-
- GUEST_ASSERT(gic_irq_get_active(intid));
-
- if (!level_sensitive)
- GUEST_ASSERT(!gic_irq_get_pending(intid));
-
- if (level_sensitive)
- guest_set_irq_line(intid, 0);
-
- GUEST_ASSERT(intid < MAX_SPI);
- irqnr_received[intid] += 1;
- irq_handled += 1;
-
- gic_set_eoi(intid);
- GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
- if (eoi_split)
- gic_set_dir(intid);
-
- GUEST_ASSERT(!gic_irq_get_active(intid));
- GUEST_ASSERT(!gic_irq_get_pending(intid));
-}
-
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
- uint32_t num, int level, bool expect_failure)
-{
- struct kvm_inject_args args = {
- .cmd = cmd,
- .first_intid = first_intid,
- .num = num,
- .level = level,
- .expect_failure = expect_failure,
- };
- GUEST_SYNC(&args);
-}
-
-#define GUEST_ASSERT_IAR_EMPTY() \
-do { \
- uint32_t _intid; \
- _intid = gic_get_and_ack_irq(); \
- GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
-} while (0)
-
-#define CAT_HELPER(a, b) a ## b
-#define CAT(a, b) CAT_HELPER(a, b)
-#define PREFIX guest_irq_handler_
-#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
-#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
-static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
-{ \
- guest_irq_generic_handler(split, lev); \
-}
-
-GENERATE_GUEST_IRQ_HANDLER(0, 0);
-GENERATE_GUEST_IRQ_HANDLER(0, 1);
-GENERATE_GUEST_IRQ_HANDLER(1, 0);
-GENERATE_GUEST_IRQ_HANDLER(1, 1);
-
-static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
- {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
- {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
-};
-
-static void reset_priorities(struct test_args *args)
-{
- int i;
-
- for (i = 0; i < args->nr_irqs; i++)
- gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
-}
-
-static void guest_set_irq_line(uint32_t intid, uint32_t level)
-{
- kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
-}
-
-static void test_inject_fail(struct test_args *args,
- uint32_t intid, kvm_inject_cmd cmd)
-{
- reset_stats();
-
- _KVM_INJECT(cmd, intid, true);
- /* no IRQ to handle on entry */
-
- GUEST_ASSERT_EQ(irq_handled, 0);
- GUEST_ASSERT_IAR_EMPTY();
-}
-
-static void guest_inject(struct test_args *args,
- uint32_t first_intid, uint32_t num,
- kvm_inject_cmd cmd)
-{
- uint32_t i;
-
- reset_stats();
-
- /* Cycle over all priorities to make things more interesting. */
- for (i = first_intid; i < num + first_intid; i++)
- gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
-
- asm volatile("msr daifset, #2" : : : "memory");
- KVM_INJECT_MULTI(cmd, first_intid, num);
-
- while (irq_handled < num) {
- wfi();
- local_irq_enable();
- isb(); /* handle IRQ */
- local_irq_disable();
- }
- local_irq_enable();
-
- GUEST_ASSERT_EQ(irq_handled, num);
- for (i = first_intid; i < num + first_intid; i++)
- GUEST_ASSERT_EQ(irqnr_received[i], 1);
- GUEST_ASSERT_IAR_EMPTY();
-
- reset_priorities(args);
-}
-
-/*
- * Restore the active state of multiple concurrent IRQs (given by
- * concurrent_irqs). This does what a live-migration would do on the
- * destination side assuming there are some active IRQs that were not
- * deactivated yet.
- */
-static void guest_restore_active(struct test_args *args,
- uint32_t first_intid, uint32_t num,
- kvm_inject_cmd cmd)
-{
- uint32_t prio, intid, ap1r;
- int i;
-
- /*
- * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
- * in descending order, so intid+1 can preempt intid.
- */
- for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
- GUEST_ASSERT(prio >= 0);
- intid = i + first_intid;
- gic_set_priority(intid, prio);
- }
-
- /*
- * In a real migration, KVM would restore all GIC state before running
- * guest code.
- */
- for (i = 0; i < num; i++) {
- intid = i + first_intid;
- KVM_ACTIVATE(cmd, intid);
- ap1r = gic_read_ap1r0();
- ap1r |= 1U << i;
- gic_write_ap1r0(ap1r);
- }
-
- /* This is where the "migration" would occur. */
-
- /* finish handling the IRQs starting with the highest priority one. */
- for (i = 0; i < num; i++) {
- intid = num - i - 1 + first_intid;
- gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
- }
-
- for (i = 0; i < num; i++)
- GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
- GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
- GUEST_ASSERT_IAR_EMPTY();
-}
-
-/*
- * Polls the IAR until it's not a spurious interrupt.
- *
- * This function should only be used in test_inject_preemption (with IRQs
- * masked).
- */
-static uint32_t wait_for_and_activate_irq(void)
-{
- uint32_t intid;
-
- do {
- asm volatile("wfi" : : : "memory");
- intid = gic_get_and_ack_irq();
- } while (intid == IAR_SPURIOUS);
-
- return intid;
-}
-
-/*
- * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
- * handle them without handling the actual exceptions. This is done by masking
- * interrupts for the whole test.
- */
-static void test_inject_preemption(struct test_args *args,
- uint32_t first_intid, int num,
- kvm_inject_cmd cmd)
-{
- uint32_t intid, prio, step = KVM_PRIO_STEPS;
- int i;
-
- /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
- * in descending order, so intid+1 can preempt intid.
- */
- for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
- GUEST_ASSERT(prio >= 0);
- intid = i + first_intid;
- gic_set_priority(intid, prio);
- }
-
- local_irq_disable();
-
- for (i = 0; i < num; i++) {
- uint32_t tmp;
- intid = i + first_intid;
- KVM_INJECT(cmd, intid);
- /* Each successive IRQ will preempt the previous one. */
- tmp = wait_for_and_activate_irq();
- GUEST_ASSERT_EQ(tmp, intid);
- if (args->level_sensitive)
- guest_set_irq_line(intid, 0);
- }
-
- /* finish handling the IRQs starting with the highest priority one. */
- for (i = 0; i < num; i++) {
- intid = num - i - 1 + first_intid;
- gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
- }
-
- local_irq_enable();
-
- for (i = 0; i < num; i++)
- GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
- GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
- GUEST_ASSERT_IAR_EMPTY();
-
- reset_priorities(args);
-}
-
-static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
-{
- uint32_t nr_irqs = args->nr_irqs;
-
- if (f->sgi) {
- guest_inject(args, MIN_SGI, 1, f->cmd);
- guest_inject(args, 0, 16, f->cmd);
- }
-
- if (f->ppi)
- guest_inject(args, MIN_PPI, 1, f->cmd);
-
- if (f->spi) {
- guest_inject(args, MIN_SPI, 1, f->cmd);
- guest_inject(args, nr_irqs - 1, 1, f->cmd);
- guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
- }
-}
-
-static void test_injection_failure(struct test_args *args,
- struct kvm_inject_desc *f)
-{
- uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
- test_inject_fail(args, bad_intid[i], f->cmd);
-}
-
-static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
-{
- /*
- * Test up to 4 levels of preemption. The reason is that KVM doesn't
- * currently implement the ability to have more than the number-of-LRs
- * number of concurrently active IRQs. The number of LRs implemented is
- * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
- */
- if (f->sgi)
- test_inject_preemption(args, MIN_SGI, 4, f->cmd);
-
- if (f->ppi)
- test_inject_preemption(args, MIN_PPI, 4, f->cmd);
-
- if (f->spi)
- test_inject_preemption(args, MIN_SPI, 4, f->cmd);
-}
-
-static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
-{
- /* Test up to 4 active IRQs. Same reason as in test_preemption. */
- if (f->sgi)
- guest_restore_active(args, MIN_SGI, 4, f->cmd);
-
- if (f->ppi)
- guest_restore_active(args, MIN_PPI, 4, f->cmd);
-
- if (f->spi)
- guest_restore_active(args, MIN_SPI, 4, f->cmd);
-}
-
-static void guest_code(struct test_args *args)
-{
- uint32_t i, nr_irqs = args->nr_irqs;
- bool level_sensitive = args->level_sensitive;
- struct kvm_inject_desc *f, *inject_fns;
-
- gic_init(GIC_V3, 1);
-
- for (i = 0; i < nr_irqs; i++)
- gic_irq_enable(i);
-
- for (i = MIN_SPI; i < nr_irqs; i++)
- gic_irq_set_config(i, !level_sensitive);
-
- gic_set_eoi_split(args->eoi_split);
-
- reset_priorities(args);
- gic_set_priority_mask(CPU_PRIO_MASK);
-
- inject_fns = level_sensitive ? inject_level_fns
- : inject_edge_fns;
-
- local_irq_enable();
-
- /* Start the tests. */
- for_each_supported_inject_fn(args, inject_fns, f) {
- test_injection(args, f);
- test_preemption(args, f);
- test_injection_failure(args, f);
- }
-
- /*
- * Restore the active state of IRQs. This would happen when live
- * migrating IRQs in the middle of being handled.
- */
- for_each_supported_activate_fn(args, set_active_fns, f)
- test_restore_active(args, f);
-
- GUEST_DONE();
-}
-
-static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
- struct test_args *test_args, bool expect_failure)
-{
- int ret;
-
- if (!expect_failure) {
- kvm_arm_irq_line(vm, intid, level);
- } else {
- /* The interface doesn't allow larger intid's. */
- if (intid > KVM_ARM_IRQ_NUM_MASK)
- return;
-
- ret = _kvm_arm_irq_line(vm, intid, level);
- TEST_ASSERT(ret != 0 && errno == EINVAL,
- "Bad intid %i did not cause KVM_IRQ_LINE "
- "error: rc: %i errno: %i", intid, ret, errno);
- }
-}
-
-void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
- bool expect_failure)
-{
- if (!expect_failure) {
- kvm_irq_set_level_info(gic_fd, intid, level);
- } else {
- int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
- /*
- * The kernel silently fails for invalid SPIs and SGIs (which
- * are not level-sensitive). It only checks for intid to not
- * spill over 1U << 10 (the max reserved SPI). Also, callers
- * are supposed to mask the intid with 0x3ff (1023).
- */
- if (intid > VGIC_MAX_RESERVED)
- TEST_ASSERT(ret != 0 && errno == EINVAL,
- "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
- "error: rc: %i errno: %i", intid, ret, errno);
- else
- TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
- "for intid %i failed, rc: %i errno: %i",
- intid, ret, errno);
- }
-}
-
-static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
- uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
- bool expect_failure)
-{
- struct kvm_irq_routing *routing;
- int ret;
- uint64_t i;
-
- assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
-
- routing = kvm_gsi_routing_create();
- for (i = intid; i < (uint64_t)intid + num; i++)
- kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
-
- if (!expect_failure) {
- kvm_gsi_routing_write(vm, routing);
- } else {
- ret = _kvm_gsi_routing_write(vm, routing);
- /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
- if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
- TEST_ASSERT(ret != 0 && errno == EINVAL,
- "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
- "error: rc: %i errno: %i", intid, ret, errno);
- else
- TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
- "for intid %i failed, rc: %i errno: %i",
- intid, ret, errno);
- }
-}
-
-static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
- struct kvm_vcpu *vcpu,
- bool expect_failure)
-{
- /*
- * Ignore this when expecting failure as invalid intids will lead to
- * either trying to inject SGIs when we configured the test to be
- * level_sensitive (or the reverse), or inject large intids which
- * will lead to writing above the ISPENDR register space (and we
- * don't want to do that either).
- */
- if (!expect_failure)
- kvm_irq_write_ispendr(gic_fd, intid, vcpu);
-}
-
-static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
- uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
- bool expect_failure)
-{
- int fd[MAX_SPI];
- uint64_t val;
- int ret, f;
- uint64_t i;
-
- /*
- * There is no way to try injecting an SGI or PPI as the interface
- * starts counting from the first SPI (above the private ones), so just
- * exit.
- */
- if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
- return;
-
- kvm_set_gsi_routing_irqchip_check(vm, intid, num,
- kvm_max_routes, expect_failure);
-
- /*
- * If expect_failure, then just to inject anyway. These
- * will silently fail. And in any case, the guest will check
- * that no actual interrupt was injected for those cases.
- */
-
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- fd[f] = eventfd(0, 0);
- TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
- }
-
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- struct kvm_irqfd irqfd = {
- .fd = fd[f],
- .gsi = i - MIN_SPI,
- };
- assert(i <= (uint64_t)UINT_MAX);
- vm_ioctl(vm, KVM_IRQFD, &irqfd);
- }
-
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- val = 1;
- ret = write(fd[f], &val, sizeof(uint64_t));
- TEST_ASSERT(ret == sizeof(uint64_t),
- __KVM_SYSCALL_ERROR("write()", ret));
- }
-
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
- close(fd[f]);
-}
-
-/* handles the valid case: intid=0xffffffff num=1 */
-#define for_each_intid(first, num, tmp, i) \
- for ((tmp) = (i) = (first); \
- (tmp) < (uint64_t)(first) + (uint64_t)(num); \
- (tmp)++, (i)++)
-
-static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
- struct kvm_inject_args *inject_args,
- struct test_args *test_args)
-{
- kvm_inject_cmd cmd = inject_args->cmd;
- uint32_t intid = inject_args->first_intid;
- uint32_t num = inject_args->num;
- int level = inject_args->level;
- bool expect_failure = inject_args->expect_failure;
- struct kvm_vm *vm = vcpu->vm;
- uint64_t tmp;
- uint32_t i;
-
- /* handles the valid case: intid=0xffffffff num=1 */
- assert(intid < UINT_MAX - num || num == 1);
-
- switch (cmd) {
- case KVM_INJECT_EDGE_IRQ_LINE:
- for_each_intid(intid, num, tmp, i)
- kvm_irq_line_check(vm, i, 1, test_args,
- expect_failure);
- for_each_intid(intid, num, tmp, i)
- kvm_irq_line_check(vm, i, 0, test_args,
- expect_failure);
- break;
- case KVM_SET_IRQ_LINE:
- for_each_intid(intid, num, tmp, i)
- kvm_irq_line_check(vm, i, level, test_args,
- expect_failure);
- break;
- case KVM_SET_IRQ_LINE_HIGH:
- for_each_intid(intid, num, tmp, i)
- kvm_irq_line_check(vm, i, 1, test_args,
- expect_failure);
- break;
- case KVM_SET_LEVEL_INFO_HIGH:
- for_each_intid(intid, num, tmp, i)
- kvm_irq_set_level_info_check(gic_fd, i, 1,
- expect_failure);
- break;
- case KVM_INJECT_IRQFD:
- kvm_routing_and_irqfd_check(vm, intid, num,
- test_args->kvm_max_routes,
- expect_failure);
- break;
- case KVM_WRITE_ISPENDR:
- for (i = intid; i < intid + num; i++)
- kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
- expect_failure);
- break;
- case KVM_WRITE_ISACTIVER:
- for (i = intid; i < intid + num; i++)
- kvm_irq_write_isactiver(gic_fd, i, vcpu);
- break;
- default:
- break;
- }
-}
-
-static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
- struct kvm_inject_args *args)
-{
- struct kvm_inject_args *kvm_args_hva;
- vm_vaddr_t kvm_args_gva;
-
- kvm_args_gva = uc->args[1];
- kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
- memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
-}
-
-static void print_args(struct test_args *args)
-{
- printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
- args->nr_irqs, args->level_sensitive,
- args->eoi_split);
-}
-
-static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
-{
- struct ucall uc;
- int gic_fd;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct kvm_inject_args inject_args;
- vm_vaddr_t args_gva;
-
- struct test_args args = {
- .nr_irqs = nr_irqs,
- .level_sensitive = level_sensitive,
- .eoi_split = eoi_split,
- .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
- .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
- };
-
- print_args(&args);
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- /* Setup the guest args page (so it gets the args). */
- args_gva = vm_vaddr_alloc_page(vm);
- memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
- vcpu_args_set(vcpu, 1, args_gva);
-
- gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
-
- vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
- guest_irq_handlers[args.eoi_split][args.level_sensitive]);
-
- while (1) {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- kvm_inject_get_call(vm, &uc, &inject_args);
- run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
-done:
- close(gic_fd);
- kvm_vm_free(vm);
-}
-
-static void help(const char *name)
-{
- printf(
- "\n"
- "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
- printf(" -n: specify number of IRQs to setup the vgic with. "
- "It has to be a multiple of 32 and between 64 and 1024.\n");
- printf(" -e: if 1 then EOI is split into a write to DIR on top "
- "of writing EOI.\n");
- printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
- puts("");
- exit(1);
-}
-
-int main(int argc, char **argv)
-{
- uint32_t nr_irqs = 64;
- bool default_args = true;
- bool level_sensitive = false;
- int opt;
- bool eoi_split = false;
-
- while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
- switch (opt) {
- case 'n':
- nr_irqs = atoi_non_negative("Number of IRQs", optarg);
- if (nr_irqs > 1024 || nr_irqs % 32)
- help(argv[0]);
- break;
- case 'e':
- eoi_split = (bool)atoi_paranoid(optarg);
- default_args = false;
- break;
- case 'l':
- level_sensitive = (bool)atoi_paranoid(optarg);
- default_args = false;
- break;
- case 'h':
- default:
- help(argv[0]);
- break;
- }
- }
-
- /*
- * If the user just specified nr_irqs and/or gic_version, then run all
- * combinations.
- */
- if (default_args) {
- test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
- test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
- test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
- test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
- } else {
- test_vgic(nr_irqs, level_sensitive, eoi_split);
- }
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic_lpi_stress - Stress test for KVM's ITS emulation
- *
- * Copyright (c) 2024 Google LLC
- */
-
-#include <linux/sizes.h>
-#include <pthread.h>
-#include <stdatomic.h>
-#include <sys/sysinfo.h>
-
-#include "kvm_util.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "gic_v3_its.h"
-#include "processor.h"
-#include "ucall.h"
-#include "vgic.h"
-
-#define TEST_MEMSLOT_INDEX 1
-
-#define GIC_LPI_OFFSET 8192
-
-static size_t nr_iterations = 1000;
-static vm_paddr_t gpa_base;
-
-static struct kvm_vm *vm;
-static struct kvm_vcpu **vcpus;
-static int gic_fd, its_fd;
-
-static struct test_data {
- bool request_vcpus_stop;
- u32 nr_cpus;
- u32 nr_devices;
- u32 nr_event_ids;
-
- vm_paddr_t device_table;
- vm_paddr_t collection_table;
- vm_paddr_t cmdq_base;
- void *cmdq_base_va;
- vm_paddr_t itt_tables;
-
- vm_paddr_t lpi_prop_table;
- vm_paddr_t lpi_pend_tables;
-} test_data = {
- .nr_cpus = 1,
- .nr_devices = 1,
- .nr_event_ids = 16,
-};
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
- u32 intid = gic_get_and_ack_irq();
-
- if (intid == IAR_SPURIOUS)
- return;
-
- GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
- gic_set_eoi(intid);
-}
-
-static void guest_setup_its_mappings(void)
-{
- u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
- u32 nr_events = test_data.nr_event_ids;
- u32 nr_devices = test_data.nr_devices;
- u32 nr_cpus = test_data.nr_cpus;
-
- for (coll_id = 0; coll_id < nr_cpus; coll_id++)
- its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
-
- /* Round-robin the LPIs to all of the vCPUs in the VM */
- coll_id = 0;
- for (device_id = 0; device_id < nr_devices; device_id++) {
- vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
-
- its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
- itt_base, SZ_64K, true);
-
- for (event_id = 0; event_id < nr_events; event_id++) {
- its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
- event_id, coll_id, intid++);
-
- coll_id = (coll_id + 1) % test_data.nr_cpus;
- }
- }
-}
-
-static void guest_invalidate_all_rdists(void)
-{
- int i;
-
- for (i = 0; i < test_data.nr_cpus; i++)
- its_send_invall_cmd(test_data.cmdq_base_va, i);
-}
-
-static void guest_setup_gic(void)
-{
- static atomic_int nr_cpus_ready = 0;
- u32 cpuid = guest_get_vcpuid();
-
- gic_init(GIC_V3, test_data.nr_cpus);
- gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
- test_data.lpi_pend_tables + (cpuid * SZ_64K));
-
- atomic_fetch_add(&nr_cpus_ready, 1);
-
- if (cpuid > 0)
- return;
-
- while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
- cpu_relax();
-
- its_init(test_data.collection_table, SZ_64K,
- test_data.device_table, SZ_64K,
- test_data.cmdq_base, SZ_64K);
-
- guest_setup_its_mappings();
- guest_invalidate_all_rdists();
-}
-
-static void guest_code(size_t nr_lpis)
-{
- guest_setup_gic();
-
- GUEST_SYNC(0);
-
- /*
- * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
- * never getting the stop signal.
- */
- while (!READ_ONCE(test_data.request_vcpus_stop))
- cpu_relax();
-
- GUEST_DONE();
-}
-
-static void setup_memslot(void)
-{
- size_t pages;
- size_t sz;
-
- /*
- * For the ITS:
- * - A single level device table
- * - A single level collection table
- * - The command queue
- * - An ITT for each device
- */
- sz = (3 + test_data.nr_devices) * SZ_64K;
-
- /*
- * For the redistributors:
- * - A shared LPI configuration table
- * - An LPI pending table for each vCPU
- */
- sz += (1 + test_data.nr_cpus) * SZ_64K;
-
- pages = sz / vm->page_size;
- gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
- TEST_MEMSLOT_INDEX, pages, 0);
-}
-
-#define LPI_PROP_DEFAULT_PRIO 0xa0
-
-static void configure_lpis(void)
-{
- size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
- u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
- size_t i;
-
- for (i = 0; i < nr_lpis; i++) {
- tbl[i] = LPI_PROP_DEFAULT_PRIO |
- LPI_PROP_GROUP1 |
- LPI_PROP_ENABLED;
- }
-}
-
-static void setup_test_data(void)
-{
- size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
- u32 nr_devices = test_data.nr_devices;
- u32 nr_cpus = test_data.nr_cpus;
- vm_paddr_t cmdq_base;
-
- test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
- gpa_base,
- TEST_MEMSLOT_INDEX);
-
- test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
- gpa_base,
- TEST_MEMSLOT_INDEX);
-
- cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
- TEST_MEMSLOT_INDEX);
- virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
- test_data.cmdq_base = cmdq_base;
- test_data.cmdq_base_va = (void *)cmdq_base;
-
- test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
- gpa_base, TEST_MEMSLOT_INDEX);
-
- test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
- gpa_base, TEST_MEMSLOT_INDEX);
- configure_lpis();
-
- test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
- gpa_base, TEST_MEMSLOT_INDEX);
-
- sync_global_to_guest(vm, test_data);
-}
-
-static void setup_gic(void)
-{
- gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
-
- its_fd = vgic_its_setup(vm);
-}
-
-static void signal_lpi(u32 device_id, u32 event_id)
-{
- vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
-
- struct kvm_msi msi = {
- .address_lo = db_addr,
- .address_hi = db_addr >> 32,
- .data = event_id,
- .devid = device_id,
- .flags = KVM_MSI_VALID_DEVID,
- };
-
- /*
- * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
- * which for arm64 implies having a valid translation in the ITS.
- */
- TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
- "KVM_SIGNAL_MSI ioctl failed");
-}
-
-static pthread_barrier_t test_setup_barrier;
-
-static void *lpi_worker_thread(void *data)
-{
- u32 device_id = (size_t)data;
- u32 event_id;
- size_t i;
-
- pthread_barrier_wait(&test_setup_barrier);
-
- for (i = 0; i < nr_iterations; i++)
- for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
- signal_lpi(device_id, event_id);
-
- return NULL;
-}
-
-static void *vcpu_worker_thread(void *data)
-{
- struct kvm_vcpu *vcpu = data;
- struct ucall uc;
-
- while (true) {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- pthread_barrier_wait(&test_setup_barrier);
- continue;
- case UCALL_DONE:
- return NULL;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- default:
- TEST_FAIL("Unknown ucall: %lu", uc.cmd);
- }
- }
-
- return NULL;
-}
-
-static void report_stats(struct timespec delta)
-{
- double nr_lpis;
- double time;
-
- nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
-
- time = delta.tv_sec;
- time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
-
- pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
-}
-
-static void run_test(void)
-{
- u32 nr_devices = test_data.nr_devices;
- u32 nr_vcpus = test_data.nr_cpus;
- pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
- pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
- struct timespec start, delta;
- size_t i;
-
- TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
-
- pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
-
- for (i = 0; i < nr_vcpus; i++)
- pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
-
- for (i = 0; i < nr_devices; i++)
- pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
-
- pthread_barrier_wait(&test_setup_barrier);
-
- clock_gettime(CLOCK_MONOTONIC, &start);
-
- for (i = 0; i < nr_devices; i++)
- pthread_join(lpi_threads[i], NULL);
-
- delta = timespec_elapsed(start);
- write_guest_global(vm, test_data.request_vcpus_stop, true);
-
- for (i = 0; i < nr_vcpus; i++)
- pthread_join(vcpu_threads[i], NULL);
-
- report_stats(delta);
-}
-
-static void setup_vm(void)
-{
- int i;
-
- vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
- TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
-
- vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
-
- vm_init_descriptor_tables(vm);
- for (i = 0; i < test_data.nr_cpus; i++)
- vcpu_init_descriptor_tables(vcpus[i]);
-
- vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
-
- setup_memslot();
-
- setup_gic();
-
- setup_test_data();
-}
-
-static void destroy_vm(void)
-{
- close(its_fd);
- close(gic_fd);
- kvm_vm_free(vm);
- free(vcpus);
-}
-
-static void pr_usage(const char *name)
-{
- pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
- pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
- pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
- pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
- pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
-}
-
-int main(int argc, char **argv)
-{
- u32 nr_threads;
- int c;
-
- while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
- switch (c) {
- case 'v':
- test_data.nr_cpus = atoi(optarg);
- break;
- case 'd':
- test_data.nr_devices = atoi(optarg);
- break;
- case 'e':
- test_data.nr_event_ids = atoi(optarg);
- break;
- case 'i':
- nr_iterations = strtoul(optarg, NULL, 0);
- break;
- case 'h':
- default:
- pr_usage(argv[0]);
- return 1;
- }
- }
-
- nr_threads = test_data.nr_cpus + test_data.nr_devices;
- if (nr_threads > get_nprocs())
- pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
- nr_threads, get_nprocs());
-
- setup_vm();
-
- run_test();
-
- destroy_vm();
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vpmu_counter_access - Test vPMU event counter access
- *
- * Copyright (c) 2023 Google LLC.
- *
- * This test checks if the guest can see the same number of the PMU event
- * counters (PMCR_EL0.N) that userspace sets, if the guest can access
- * those counters, and if the guest is prevented from accessing any
- * other counters.
- * It also checks if the userspace accesses to the PMU regsisters honor the
- * PMCR.N value that's set for the guest.
- * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
- */
-#include <kvm_util.h>
-#include <processor.h>
-#include <test_util.h>
-#include <vgic.h>
-#include <perf/arm_pmuv3.h>
-#include <linux/bitfield.h>
-
-/* The max number of the PMU event counters (excluding the cycle counter) */
-#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
-
-/* The cycle counter bit position that's common among the PMU registers */
-#define ARMV8_PMU_CYCLE_IDX 31
-
-struct vpmu_vm {
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- int gic_fd;
-};
-
-static struct vpmu_vm vpmu_vm;
-
-struct pmreg_sets {
- uint64_t set_reg_id;
- uint64_t clr_reg_id;
-};
-
-#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
-
-static uint64_t get_pmcr_n(uint64_t pmcr)
-{
- return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
-}
-
-static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
-{
- u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
-}
-
-static uint64_t get_counters_mask(uint64_t n)
-{
- uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
-
- if (n)
- mask |= GENMASK(n - 1, 0);
- return mask;
-}
-
-/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
-static inline unsigned long read_sel_evcntr(int sel)
-{
- write_sysreg(sel, pmselr_el0);
- isb();
- return read_sysreg(pmxevcntr_el0);
-}
-
-/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
-static inline void write_sel_evcntr(int sel, unsigned long val)
-{
- write_sysreg(sel, pmselr_el0);
- isb();
- write_sysreg(val, pmxevcntr_el0);
- isb();
-}
-
-/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
-static inline unsigned long read_sel_evtyper(int sel)
-{
- write_sysreg(sel, pmselr_el0);
- isb();
- return read_sysreg(pmxevtyper_el0);
-}
-
-/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
-static inline void write_sel_evtyper(int sel, unsigned long val)
-{
- write_sysreg(sel, pmselr_el0);
- isb();
- write_sysreg(val, pmxevtyper_el0);
- isb();
-}
-
-static void pmu_disable_reset(void)
-{
- uint64_t pmcr = read_sysreg(pmcr_el0);
-
- /* Reset all counters, disabling them */
- pmcr &= ~ARMV8_PMU_PMCR_E;
- write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
- isb();
-}
-
-#define RETURN_READ_PMEVCNTRN(n) \
- return read_sysreg(pmevcntr##n##_el0)
-static unsigned long read_pmevcntrn(int n)
-{
- PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
- return 0;
-}
-
-#define WRITE_PMEVCNTRN(n) \
- write_sysreg(val, pmevcntr##n##_el0)
-static void write_pmevcntrn(int n, unsigned long val)
-{
- PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
- isb();
-}
-
-#define READ_PMEVTYPERN(n) \
- return read_sysreg(pmevtyper##n##_el0)
-static unsigned long read_pmevtypern(int n)
-{
- PMEVN_SWITCH(n, READ_PMEVTYPERN);
- return 0;
-}
-
-#define WRITE_PMEVTYPERN(n) \
- write_sysreg(val, pmevtyper##n##_el0)
-static void write_pmevtypern(int n, unsigned long val)
-{
- PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
- isb();
-}
-
-/*
- * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
- * accessors that test cases will use. Each of the accessors will
- * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
- * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
- * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
- *
- * This is used to test that combinations of those accessors provide
- * the consistent behavior.
- */
-struct pmc_accessor {
- /* A function to be used to read PMEVTCNTR<n>_EL0 */
- unsigned long (*read_cntr)(int idx);
- /* A function to be used to write PMEVTCNTR<n>_EL0 */
- void (*write_cntr)(int idx, unsigned long val);
- /* A function to be used to read PMEVTYPER<n>_EL0 */
- unsigned long (*read_typer)(int idx);
- /* A function to be used to write PMEVTYPER<n>_EL0 */
- void (*write_typer)(int idx, unsigned long val);
-};
-
-struct pmc_accessor pmc_accessors[] = {
- /* test with all direct accesses */
- { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
- /* test with all indirect accesses */
- { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
- /* read with direct accesses, and write with indirect accesses */
- { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
- /* read with indirect accesses, and write with direct accesses */
- { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
-};
-
-/*
- * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
- * assuming that the pointer is one of the entries in pmc_accessors[].
- */
-#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0])
-
-#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \
-{ \
- uint64_t _tval = read_sysreg(regname); \
- \
- if (set_expected) \
- __GUEST_ASSERT((_tval & mask), \
- "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
- _tval, mask, set_expected); \
- else \
- __GUEST_ASSERT(!(_tval & mask), \
- "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
- _tval, mask, set_expected); \
-}
-
-/*
- * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
- * are set or cleared as specified in @set_expected.
- */
-static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
-{
- GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
- GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
- GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
- GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
- GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
- GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
-}
-
-/*
- * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
- * to the specified counter (@pmc_idx) can be read/written as expected.
- * When @set_op is true, it tries to set the bit for the counter in
- * those registers by writing the SET registers (the bit won't be set
- * if the counter is not implemented though).
- * Otherwise, it tries to clear the bits in the registers by writing
- * the CLR registers.
- * Then, it checks if the values indicated in the registers are as expected.
- */
-static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
-{
- uint64_t pmcr_n, test_bit = BIT(pmc_idx);
- bool set_expected = false;
-
- if (set_op) {
- write_sysreg(test_bit, pmcntenset_el0);
- write_sysreg(test_bit, pmintenset_el1);
- write_sysreg(test_bit, pmovsset_el0);
-
- /* The bit will be set only if the counter is implemented */
- pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
- set_expected = (pmc_idx < pmcr_n) ? true : false;
- } else {
- write_sysreg(test_bit, pmcntenclr_el0);
- write_sysreg(test_bit, pmintenclr_el1);
- write_sysreg(test_bit, pmovsclr_el0);
- }
- check_bitmap_pmu_regs(test_bit, set_expected);
-}
-
-/*
- * Tests for reading/writing registers for the (implemented) event counter
- * specified by @pmc_idx.
- */
-static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
-{
- uint64_t write_data, read_data;
-
- /* Disable all PMCs and reset all PMCs to zero. */
- pmu_disable_reset();
-
- /*
- * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
- */
-
- /* Make sure that the bit in those registers are set to 0 */
- test_bitmap_pmu_regs(pmc_idx, false);
- /* Test if setting the bit in those registers works */
- test_bitmap_pmu_regs(pmc_idx, true);
- /* Test if clearing the bit in those registers works */
- test_bitmap_pmu_regs(pmc_idx, false);
-
- /*
- * Tests for reading/writing the event type register.
- */
-
- /*
- * Set the event type register to an arbitrary value just for testing
- * of reading/writing the register.
- * Arm ARM says that for the event from 0x0000 to 0x003F,
- * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
- * the value written to the field even when the specified event
- * is not supported.
- */
- write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
- acc->write_typer(pmc_idx, write_data);
- read_data = acc->read_typer(pmc_idx);
- __GUEST_ASSERT(read_data == write_data,
- "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
- pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
-
- /*
- * Tests for reading/writing the event count register.
- */
-
- read_data = acc->read_cntr(pmc_idx);
-
- /* The count value must be 0, as it is disabled and reset */
- __GUEST_ASSERT(read_data == 0,
- "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
- pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
-
- write_data = read_data + pmc_idx + 0x12345;
- acc->write_cntr(pmc_idx, write_data);
- read_data = acc->read_cntr(pmc_idx);
- __GUEST_ASSERT(read_data == write_data,
- "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
- pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
-}
-
-#define INVALID_EC (-1ul)
-uint64_t expected_ec = INVALID_EC;
-
-static void guest_sync_handler(struct ex_regs *regs)
-{
- uint64_t esr, ec;
-
- esr = read_sysreg(esr_el1);
- ec = ESR_ELx_EC(esr);
-
- __GUEST_ASSERT(expected_ec == ec,
- "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
- regs->pc, esr, ec, expected_ec);
-
- /* skip the trapping instruction */
- regs->pc += 4;
-
- /* Use INVALID_EC to indicate an exception occurred */
- expected_ec = INVALID_EC;
-}
-
-/*
- * Run the given operation that should trigger an exception with the
- * given exception class. The exception handler (guest_sync_handler)
- * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
- * the instruction that trapped.
- */
-#define TEST_EXCEPTION(ec, ops) \
-({ \
- GUEST_ASSERT(ec != INVALID_EC); \
- WRITE_ONCE(expected_ec, ec); \
- dsb(ish); \
- ops; \
- GUEST_ASSERT(expected_ec == INVALID_EC); \
-})
-
-/*
- * Tests for reading/writing registers for the unimplemented event counter
- * specified by @pmc_idx (>= PMCR_EL0.N).
- */
-static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
-{
- /*
- * Reading/writing the event count/type registers should cause
- * an UNDEFINED exception.
- */
- TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
- TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
- TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
- TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
- /*
- * The bit corresponding to the (unimplemented) counter in
- * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
- */
- test_bitmap_pmu_regs(pmc_idx, 1);
- test_bitmap_pmu_regs(pmc_idx, 0);
-}
-
-/*
- * The guest is configured with PMUv3 with @expected_pmcr_n number of
- * event counters.
- * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
- * if reading/writing PMU registers for implemented or unimplemented
- * counters works as expected.
- */
-static void guest_code(uint64_t expected_pmcr_n)
-{
- uint64_t pmcr, pmcr_n, unimp_mask;
- int i, pmc;
-
- __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
- "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
- expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
-
- pmcr = read_sysreg(pmcr_el0);
- pmcr_n = get_pmcr_n(pmcr);
-
- /* Make sure that PMCR_EL0.N indicates the value userspace set */
- __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
- "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
- expected_pmcr_n, pmcr_n);
-
- /*
- * Make sure that (RAZ) bits corresponding to unimplemented event
- * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
- * to zero.
- * (NOTE: bits for implemented event counters are reset to UNKNOWN)
- */
- unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
- check_bitmap_pmu_regs(unimp_mask, false);
-
- /*
- * Tests for reading/writing PMU registers for implemented counters.
- * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
- */
- for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
- for (pmc = 0; pmc < pmcr_n; pmc++)
- test_access_pmc_regs(&pmc_accessors[i], pmc);
- }
-
- /*
- * Tests for reading/writing PMU registers for unimplemented counters.
- * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
- */
- for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
- for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
- test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
- }
-
- GUEST_DONE();
-}
-
-/* Create a VM that has one vCPU with PMUv3 configured. */
-static void create_vpmu_vm(void *guest_code)
-{
- struct kvm_vcpu_init init;
- uint8_t pmuver, ec;
- uint64_t dfr0, irq = 23;
- struct kvm_device_attr irq_attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
- .addr = (uint64_t)&irq,
- };
- struct kvm_device_attr init_attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .attr = KVM_ARM_VCPU_PMU_V3_INIT,
- };
-
- /* The test creates the vpmu_vm multiple times. Ensure a clean state */
- memset(&vpmu_vm, 0, sizeof(vpmu_vm));
-
- vpmu_vm.vm = vm_create(1);
- vm_init_descriptor_tables(vpmu_vm.vm);
- for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
- vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
- guest_sync_handler);
- }
-
- /* Create vCPU with PMUv3 */
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
- init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
- vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
- vcpu_init_descriptor_tables(vpmu_vm.vcpu);
- vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
- __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
- "Failed to create vgic-v3, skipping");
-
- /* Make sure that PMUv3 support is indicated in the ID register */
- dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
- pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
- TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
- pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
- "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
-
- /* Initialize vPMU */
- vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
- vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
-}
-
-static void destroy_vpmu_vm(void)
-{
- close(vpmu_vm.gic_fd);
- kvm_vm_free(vpmu_vm.vm);
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
-{
- struct ucall uc;
-
- vcpu_args_set(vcpu, 1, pmcr_n);
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- break;
- }
-}
-
-static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
-{
- struct kvm_vcpu *vcpu;
- uint64_t pmcr, pmcr_orig;
-
- create_vpmu_vm(guest_code);
- vcpu = vpmu_vm.vcpu;
-
- pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
- pmcr = pmcr_orig;
-
- /*
- * Setting a larger value of PMCR.N should not modify the field, and
- * return a success.
- */
- set_pmcr_n(&pmcr, pmcr_n);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
- pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-
- if (expect_fail)
- TEST_ASSERT(pmcr_orig == pmcr,
- "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
- pmcr, pmcr_n);
- else
- TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
- "Failed to update PMCR.N to %lu (received: %lu)",
- pmcr_n, get_pmcr_n(pmcr));
-}
-
-/*
- * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
- * and run the test.
- */
-static void run_access_test(uint64_t pmcr_n)
-{
- uint64_t sp;
- struct kvm_vcpu *vcpu;
- struct kvm_vcpu_init init;
-
- pr_debug("Test with pmcr_n %lu\n", pmcr_n);
-
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
- vcpu = vpmu_vm.vcpu;
-
- /* Save the initial sp to restore them later to run the guest again */
- sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
-
- run_vcpu(vcpu, pmcr_n);
-
- /*
- * Reset and re-initialize the vCPU, and run the guest code again to
- * check if PMCR_EL0.N is preserved.
- */
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
- init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
- aarch64_vcpu_setup(vcpu, &init);
- vcpu_init_descriptor_tables(vcpu);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-
- run_vcpu(vcpu, pmcr_n);
-
- destroy_vpmu_vm();
-}
-
-static struct pmreg_sets validity_check_reg_sets[] = {
- PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
- PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
- PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
-};
-
-/*
- * Create a VM, and check if KVM handles the userspace accesses of
- * the PMU register sets in @validity_check_reg_sets[] correctly.
- */
-static void run_pmregs_validity_test(uint64_t pmcr_n)
-{
- int i;
- struct kvm_vcpu *vcpu;
- uint64_t set_reg_id, clr_reg_id, reg_val;
- uint64_t valid_counters_mask, max_counters_mask;
-
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
- vcpu = vpmu_vm.vcpu;
-
- valid_counters_mask = get_counters_mask(pmcr_n);
- max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
-
- for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
- set_reg_id = validity_check_reg_sets[i].set_reg_id;
- clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
-
- /*
- * Test if the 'set' and 'clr' variants of the registers
- * are initialized based on the number of valid counters.
- */
- reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
- TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
- KVM_ARM64_SYS_REG(set_reg_id), reg_val);
-
- reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
- TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
- KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
-
- /*
- * Using the 'set' variant, force-set the register to the
- * max number of possible counters and test if KVM discards
- * the bits for unimplemented counters as it should.
- */
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
-
- reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
- TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
- KVM_ARM64_SYS_REG(set_reg_id), reg_val);
-
- reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
- TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
- "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
- KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
- }
-
- destroy_vpmu_vm();
-}
-
-/*
- * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
- * the vCPU to @pmcr_n, which is larger than the host value.
- * The attempt should fail as @pmcr_n is too big to set for the vCPU.
- */
-static void run_error_test(uint64_t pmcr_n)
-{
- pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
-
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
- destroy_vpmu_vm();
-}
-
-/*
- * Return the default number of implemented PMU event counters excluding
- * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
- */
-static uint64_t get_pmcr_n_limit(void)
-{
- uint64_t pmcr;
-
- create_vpmu_vm(guest_code);
- pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
- destroy_vpmu_vm();
- return get_pmcr_n(pmcr);
-}
-
-int main(void)
-{
- uint64_t i, pmcr_n;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
-
- pmcr_n = get_pmcr_n_limit();
- for (i = 0; i <= pmcr_n; i++) {
- run_access_test(i);
- run_pmregs_validity_test(i);
- }
-
- for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
- run_error_test(i);
-
- return 0;
-}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
+ * and WI from userspace.
+ */
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+#define BAD_ID_REG_VAL 0x1badc0deul
+
+#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
+
+static void guest_main(void)
+{
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
+
+ GUEST_DONE();
+}
+
+static void test_guest_raz(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static uint64_t raz_wi_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
+};
+
+static void test_user_raz_wi(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
+ uint64_t reg_id = raz_wi_reg_ids[i];
+ uint64_t val;
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+
+ /*
+ * Expect the ioctl to succeed with no effect on the register
+ * value.
+ */
+ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+ }
+}
+
+static uint64_t raz_invariant_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
+};
+
+static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
+{
+ int i, r;
+
+ for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
+ uint64_t reg_id = raz_invariant_reg_ids[i];
+ uint64_t val;
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+
+ r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+ }
+}
+
+
+
+static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
+{
+ uint64_t val, el0;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ TEST_REQUIRE(vcpu_aarch64_only(vcpu));
+
+ test_user_raz_wi(vcpu);
+ test_user_raz_invariant(vcpu);
+ test_guest_raz(vcpu);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+#include "arch_timer.h"
+#include "delay.h"
+#include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+#include "vgic.h"
+
+enum guest_stage {
+ GUEST_STAGE_VTIMER_CVAL = 1,
+ GUEST_STAGE_VTIMER_TVAL,
+ GUEST_STAGE_PTIMER_CVAL,
+ GUEST_STAGE_PTIMER_TVAL,
+ GUEST_STAGE_MAX,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+ switch (shared_data->guest_stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_VTIMER_TVAL:
+ timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_TVAL:
+ timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ default:
+ GUEST_ASSERT(0);
+ }
+}
+
+static void guest_validate_irq(unsigned int intid,
+ struct test_vcpu_shared_data *shared_data)
+{
+ enum guest_stage stage = shared_data->guest_stage;
+ uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+ unsigned long xctl = 0;
+ unsigned int timer_irq = 0;
+ unsigned int accessor;
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ switch (stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ case GUEST_STAGE_VTIMER_TVAL:
+ accessor = VIRTUAL;
+ timer_irq = vtimer_irq;
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ case GUEST_STAGE_PTIMER_TVAL:
+ accessor = PHYSICAL;
+ timer_irq = ptimer_irq;
+ break;
+ default:
+ GUEST_ASSERT(0);
+ return;
+ }
+
+ xctl = timer_get_ctl(accessor);
+ if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+ return;
+
+ timer_set_ctl(accessor, CTL_IMASK);
+ xcnt = timer_get_cntct(accessor);
+ cval = timer_get_cval(accessor);
+
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, timer_irq);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(xcnt >= cval,
+ "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
+ xcnt, cval, xcnt_diff_us);
+ __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ guest_validate_irq(intid, shared_data);
+
+ gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+ enum guest_stage stage)
+{
+ uint32_t irq_iter, config_iter;
+
+ shared_data->guest_stage = stage;
+ shared_data->nr_iter = 0;
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup the next interrupt */
+ guest_configure_timer_action(shared_data);
+
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(msecs_to_usecs(test_args.timer_period_ms) +
+ test_args.timer_err_margin_us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, test_args.nr_vcpus);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+ GUEST_DONE();
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+ /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+ vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+ vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static int gic_fd;
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ unsigned int i;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ if (!test_args.reserved) {
+ if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+ struct kvm_arm_counter_offset offset = {
+ .counter_offset = test_args.counter_offset,
+ .reserved = 0,
+ };
+ vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+ } else
+ TEST_FAIL("no support for global offset");
+ }
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ test_init_timer_irq(vm);
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+static const uint64_t CVAL_MAX = ~0ULL;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* A nice counter value to use as the starting one for most tests. */
+static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+ atomic_t handled;
+ atomic_t spurious;
+} shared_data;
+
+struct test_args {
+ /* Virtual or physical timer and counter tests. */
+ enum arch_timer timer;
+ /* Delay used for most timer tests. */
+ uint64_t wait_ms;
+ /* Delay used in the test_long_timer_delays test. */
+ uint64_t long_wait_ms;
+ /* Number of iterations. */
+ int iterations;
+ /* Whether to test the physical timer. */
+ bool test_physical;
+ /* Whether to test the virtual timer. */
+ bool test_virtual;
+};
+
+struct test_args test_args = {
+ .wait_ms = WAIT_TEST_MS,
+ .long_wait_ms = LONG_WAIT_TEST_MS,
+ .iterations = NR_TEST_ITERS_DEF,
+ .test_physical = true,
+ .test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+ SET_COUNTER_VALUE,
+ USERSPACE_USLEEP,
+ USERSPACE_SCHED_YIELD,
+ USERSPACE_MIGRATE_SELF,
+ NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+ sleep_poll,
+ sleep_sched_poll,
+ sleep_migrate,
+ sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+ wait_for_non_spurious_irq,
+ wait_poll_for_irq,
+ wait_sched_poll_for_irq,
+ wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+ TIMER_CVAL,
+ TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+ int h = atomic_read(&shared_data.handled);
+
+ __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+ GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+ userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+ GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+ GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ enum arch_timer timer;
+ uint64_t cnt, cval;
+ uint32_t ctl;
+ bool timer_condition, istatus;
+
+ if (intid == IAR_SPURIOUS) {
+ atomic_inc(&shared_data.spurious);
+ goto out;
+ }
+
+ if (intid == ptimer_irq)
+ timer = PHYSICAL;
+ else if (intid == vtimer_irq)
+ timer = VIRTUAL;
+ else
+ goto out;
+
+ ctl = timer_get_ctl(timer);
+ cval = timer_get_cval(timer);
+ cnt = timer_get_cntct(timer);
+ timer_condition = cnt >= cval;
+ istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+ GUEST_ASSERT_EQ(timer_condition, istatus);
+
+ /* Disable and mask the timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+
+ atomic_inc(&shared_data.handled);
+
+out:
+ gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_cval(timer, cval_cycles);
+ timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_ctl(timer, ctl);
+ timer_set_tval(timer, tval_cycles);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+ enum timer_view tv)
+{
+ switch (tv) {
+ case TIMER_CVAL:
+ set_cval_irq(timer, xval, ctl);
+ break;
+ case TIMER_TVAL:
+ set_tval_irq(timer, xval, ctl);
+ break;
+ default:
+ GUEST_FAIL("Could not get timer %d", timer);
+ }
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+ int h;
+
+ local_irq_disable();
+
+ for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
+ }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+ int h;
+
+ local_irq_disable();
+
+ h = atomic_read(&shared_data.handled);
+
+ local_irq_enable();
+ while (h == atomic_read(&shared_data.handled)) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+ local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+ enum sync_cmd usp_cmd)
+{
+ uint64_t cycles = usec_to_cycles(usec);
+ /* Whichever timer we are testing with, sleep with the other. */
+ enum arch_timer sleep_timer = 1 - test_timer;
+ uint64_t start = timer_get_cntct(sleep_timer);
+
+ while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+ userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+ set_counter(timer, cnt);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+ enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ local_irq_disable();
+
+ if (reset_state)
+ reset_timer_state(timer, reset_cnt);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+ reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+ uint64_t usec, enum timer_view timer_view,
+ sleep_method_t guest_sleep)
+{
+ local_irq_disable();
+
+ set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+ guest_sleep(timer, usec);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume success (no IRQ) after waiting usec microseconds */
+ assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+ uint64_t usec, sleep_method_t wm)
+{
+ test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+ sleep_method_t wm)
+{
+ /* tval will be cast to an int32_t in test_xval_check_no_irq */
+ test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Unmask the timer, and then get an IRQ. */
+ local_irq_disable();
+ timer_set_ctl(timer, CTL_ENABLE);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wait_for_non_spurious_irq();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Local IRQs are not masked at this point. */
+
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+ irq_wait_method_t wm, int num)
+{
+ int i;
+
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_tval_irq(timer, 0, CTL_ENABLE);
+
+ for (i = 1; i <= num; i++) {
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ handler masked and disabled the timer.
+ * Enable and unmmask it again.
+ */
+ timer_set_ctl(timer, CTL_ENABLE);
+
+ assert_irqs_handled(i);
+ }
+
+ local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+ test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+ int32_t delta_1_ms, int32_t delta_2_ms)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Program the timer to DEF_CNT + delta_1_ms. */
+ set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+ /* Reprogram the timer to DEF_CNT + delta_2_ms. */
+ timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+ GUEST_ASSERT(timer_get_cntct(timer) >=
+ DEF_CNT + msec_to_cycles(delta_2_ms));
+
+ local_irq_enable();
+ assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+ int i;
+ uint64_t base_wait = test_args.wait_ms;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /*
+ * Ensure reprogramming works whether going from a
+ * longer time to a shorter or vice versa.
+ */
+ test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+ base_wait);
+ test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+ 2 * base_wait);
+ }
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ local_irq_disable();
+
+ /* cval in the past */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) -
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ /* tval in the past */
+ timer_set_tval(timer, -1);
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+ /* tval larger than TVAL_MAX. This requires programming with
+ * timer_set_cval instead so the value is expressible
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+ /*
+ * tval larger than 2 * TVAL_MAX.
+ * Twice the TVAL_MAX completely loops around the TVAL.
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <=
+ msec_to_cycles(test_args.wait_ms));
+
+ /* negative tval that rollovers from 0. */
+ set_counter(timer, msec_to_cycles(1));
+ timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+ /* tval should keep down-counting from 0 to -1. */
+ timer_set_tval(timer, 0);
+ sleep_poll(timer, 1);
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ local_irq_enable();
+
+ /* Mask and disable any pending timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+ timers_sanity_checks(timer, false);
+ /* Check how KVM saves/restores these edge-case values. */
+ timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_cval_irq(timer,
+ (uint64_t) TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+ set_counter(timer, TVAL_MAX);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+ uint64_t cval;
+ int i;
+
+ /*
+ * Test that the system is not implementing cval in terms of
+ * tval. If that was the case, setting a cval to "cval = now
+ * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+ * wait_ms", and the timer would fire immediately. Test that it
+ * doesn't.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ reset_timer_state(timer, DEF_CNT);
+ cval = timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms);
+ test_cval_no_irq(timer, cval,
+ msecs_to_usecs(test_args.wait_ms) +
+ TIMEOUT_NO_IRQ_US, sleep_method[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /* Get the IRQ by moving the counter forward. */
+ test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+ }
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t xval, uint64_t cnt_2,
+ irq_wait_method_t wm, enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t xval,
+ uint64_t cnt_2,
+ sleep_method_t guest_sleep,
+ enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+ int32_t tval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t cval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, int32_t tval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+ TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t cval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+ TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+ int i;
+ int32_t tval;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+ test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+ /* Move counter ahead of negative tval. */
+ test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+ test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+ tval = TVAL_MAX;
+ test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+ wm);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+ }
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+ sm);
+ test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+ }
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+ int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ /* set a timer wait_ms the past. */
+ cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+ /* Set a timer to counter=0 (in the past) */
+ test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a time for tval=0 (now) */
+ test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a timer to as far in the past as possible */
+ test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+ }
+
+ /*
+ * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+ * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ set_counter(timer, msec_to_cycles(test_args.wait_ms));
+ test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+ }
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+ test_basic_functionality(timer);
+ test_timers_sanity_checks(timer);
+
+ test_timers_above_tval_max(timer);
+ test_timers_in_the_past(timer);
+
+ test_move_counters_ahead_of_timers(timer);
+ test_move_counters_behind_timers(timer);
+ test_reprogram_timers(timer);
+
+ test_timers_fired_multiple_times(timer);
+
+ test_timer_control_mask_then_unmask(timer);
+ test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+ int i;
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, 1);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ for (i = 0; i < test_args.iterations; i++) {
+ GUEST_SYNC(i);
+ guest_run_iteration(timer);
+ }
+
+ test_long_timer_delays(timer);
+ GUEST_DONE();
+}
+
+static uint32_t next_pcpu(void)
+{
+ uint32_t max = get_nprocs();
+ uint32_t cur = sched_getcpu();
+ uint32_t next = cur;
+ cpu_set_t cpuset;
+
+ TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+ sched_getaffinity(0, sizeof(cpuset), &cpuset);
+
+ do {
+ next = (next + 1) % CPU_SETSIZE;
+ } while (!CPU_ISSET(next, &cpuset));
+
+ return next;
+}
+
+static void migrate_self(uint32_t new_pcpu)
+{
+ int ret;
+ cpu_set_t cpuset;
+ pthread_t thread;
+
+ thread = pthread_self();
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(new_pcpu, &cpuset);
+
+ pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
+
+ ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+
+ TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
+ new_pcpu, ret);
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+ enum arch_timer timer)
+{
+ if (timer == PHYSICAL)
+ vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+ else
+ vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ enum sync_cmd cmd = uc->args[1];
+ uint64_t val = uc->args[2];
+ enum arch_timer timer = uc->args[3];
+
+ switch (cmd) {
+ case SET_COUNTER_VALUE:
+ kvm_set_cntxct(vcpu, val, timer);
+ break;
+ case USERSPACE_USLEEP:
+ usleep(val);
+ break;
+ case USERSPACE_SCHED_YIELD:
+ sched_yield();
+ break;
+ case USERSPACE_MIGRATE_SELF:
+ migrate_self(next_pcpu());
+ break;
+ default:
+ break;
+ }
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ /* Start on CPU 0 */
+ migrate_self(0);
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ break;
+ case UCALL_DONE:
+ goto out;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto out;
+ default:
+ TEST_FAIL("Unexpected guest exit\n");
+ }
+ }
+
+ out:
+ return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+ enum arch_timer timer)
+{
+ *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+ vm_init_descriptor_tables(*vm);
+ vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handler);
+
+ vcpu_init_descriptor_tables(*vcpu);
+ vcpu_args_set(*vcpu, 1, timer);
+
+ test_init_timer_irq(*vm, *vcpu);
+ vgic_v3_setup(*vm, 1, 64);
+ sync_global_to_guest(*vm, test_args);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+ , name);
+ pr_info("\t-i: Number of iterations (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+ pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+ LONG_WAIT_TEST_MS);
+ pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ WAIT_TEST_MS);
+ pr_info("\t-p: Test physical timer (default: true)\n");
+ pr_info("\t-v: Test virtual timer (default: true)\n");
+ pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+ switch (opt) {
+ case 'b':
+ test_args.test_physical = true;
+ test_args.test_virtual = true;
+ break;
+ case 'i':
+ test_args.iterations =
+ atoi_positive("Number of iterations", optarg);
+ break;
+ case 'l':
+ test_args.long_wait_ms =
+ atoi_positive("Long wait time", optarg);
+ break;
+ case 'p':
+ test_args.test_physical = true;
+ test_args.test_virtual = false;
+ break;
+ case 'v':
+ test_args.test_virtual = true;
+ test_args.test_physical = false;
+ break;
+ case 'w':
+ test_args.wait_ms = atoi_positive("Wait time", optarg);
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+ err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (test_args.test_virtual) {
+ test_vm_create(&vm, &vcpu, VIRTUAL);
+ test_run(vm, vcpu);
+ kvm_vm_free(vm);
+ }
+
+ if (test_args.test_physical) {
+ test_vm_create(&vm, &vcpu, PHYSICAL);
+ test_run(vm, vcpu);
+ kvm_vm_free(vm);
+ }
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <linux/bitfield.h>
+
+#define MDSCR_KDE (1 << 13)
+#define MDSCR_MDE (1 << 15)
+#define MDSCR_SS (1 << 0)
+
+#define DBGBCR_LEN8 (0xff << 5)
+#define DBGBCR_EXEC (0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E (0x1 << 0)
+#define DBGBCR_LBN_SHIFT 16
+#define DBGBCR_BT_SHIFT 20
+#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT)
+#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT)
+
+#define DBGWCR_LEN8 (0xff << 5)
+#define DBGWCR_RD (0x1 << 3)
+#define DBGWCR_WR (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E (0x1 << 0)
+#define DBGWCR_LBN_SHIFT 16
+#define DBGWCR_WT_SHIFT 20
+#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS (1 << 21)
+
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
+extern unsigned char iter_ss_begin, iter_ss_end;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define PC(v) ((uint64_t)&(v))
+
+#define GEN_DEBUG_WRITE_REG(reg_name) \
+static void write_##reg_name(int num, uint64_t val) \
+{ \
+ switch (num) { \
+ case 0: \
+ write_sysreg(val, reg_name##0_el1); \
+ break; \
+ case 1: \
+ write_sysreg(val, reg_name##1_el1); \
+ break; \
+ case 2: \
+ write_sysreg(val, reg_name##2_el1); \
+ break; \
+ case 3: \
+ write_sysreg(val, reg_name##3_el1); \
+ break; \
+ case 4: \
+ write_sysreg(val, reg_name##4_el1); \
+ break; \
+ case 5: \
+ write_sysreg(val, reg_name##5_el1); \
+ break; \
+ case 6: \
+ write_sysreg(val, reg_name##6_el1); \
+ break; \
+ case 7: \
+ write_sysreg(val, reg_name##7_el1); \
+ break; \
+ case 8: \
+ write_sysreg(val, reg_name##8_el1); \
+ break; \
+ case 9: \
+ write_sysreg(val, reg_name##9_el1); \
+ break; \
+ case 10: \
+ write_sysreg(val, reg_name##10_el1); \
+ break; \
+ case 11: \
+ write_sysreg(val, reg_name##11_el1); \
+ break; \
+ case 12: \
+ write_sysreg(val, reg_name##12_el1); \
+ break; \
+ case 13: \
+ write_sysreg(val, reg_name##13_el1); \
+ break; \
+ case 14: \
+ write_sysreg(val, reg_name##14_el1); \
+ break; \
+ case 15: \
+ write_sysreg(val, reg_name##15_el1); \
+ break; \
+ default: \
+ GUEST_ASSERT(0); \
+ } \
+}
+
+/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
+GEN_DEBUG_WRITE_REG(dbgbcr)
+GEN_DEBUG_WRITE_REG(dbgbvr)
+GEN_DEBUG_WRITE_REG(dbgwcr)
+GEN_DEBUG_WRITE_REG(dbgwvr)
+
+static void reset_debug_state(void)
+{
+ uint8_t brps, wrps, i;
+ uint64_t dfr0;
+
+ asm volatile("msr daifset, #8");
+
+ write_sysreg(0, osdlr_el1);
+ write_sysreg(0, oslar_el1);
+ isb();
+
+ write_sysreg(0, mdscr_el1);
+ write_sysreg(0, contextidr_el1);
+
+ /* Reset all bcr/bvr/wcr/wvr registers */
+ dfr0 = read_sysreg(id_aa64dfr0_el1);
+ brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+ for (i = 0; i <= brps; i++) {
+ write_dbgbcr(i, 0);
+ write_dbgbvr(i, 0);
+ }
+ wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+ for (i = 0; i <= wrps; i++) {
+ write_dbgwcr(i, 0);
+ write_dbgwvr(i, 0);
+ }
+
+ isb();
+}
+
+static void enable_os_lock(void)
+{
+ write_sysreg(1, oslar_el1);
+ isb();
+
+ GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
+static void enable_monitor_debug_exceptions(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr, mdscr_el1);
+ isb();
+}
+
+static void install_wp(uint8_t wpn, uint64_t addr)
+{
+ uint32_t wcr;
+
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+ write_dbgwcr(wpn, wcr);
+ write_dbgwvr(wpn, addr);
+
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_hw_bp(uint8_t bpn, uint64_t addr)
+{
+ uint32_t bcr;
+
+ bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+ write_dbgbcr(bpn, bcr);
+ write_dbgbvr(bpn, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
+ uint64_t ctx)
+{
+ uint32_t wcr;
+ uint64_t ctx_bcr;
+
+ /* Setup a context-aware breakpoint for Linked Context ID Match */
+ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_CTX_LINK;
+ write_dbgbcr(ctx_bp, ctx_bcr);
+ write_dbgbvr(ctx_bp, ctx);
+
+ /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
+ DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+ write_dbgwcr(addr_wp, wcr);
+ write_dbgwvr(addr_wp, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
+ uint64_t ctx)
+{
+ uint32_t addr_bcr, ctx_bcr;
+
+ /* Setup a context-aware breakpoint for Linked Context ID Match */
+ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_CTX_LINK;
+ write_dbgbcr(ctx_bp, ctx_bcr);
+ write_dbgbvr(ctx_bp, ctx);
+
+ /*
+ * Setup a normal breakpoint for Linked Address Match, and link it
+ * to the context-aware breakpoint.
+ */
+ addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_ADDR_LINK_CTX |
+ ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+ write_dbgbcr(addr_bp, addr_bcr);
+ write_dbgbvr(addr_bp, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_ss(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+ write_sysreg(mdscr, mdscr_el1);
+ isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+ uint64_t ctx = 0xabcdef; /* a random context number */
+
+ /* Software-breakpoint */
+ reset_debug_state();
+ asm volatile("sw_bp: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+ /* Hardware-breakpoint */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(hw_bp));
+ asm volatile("hw_bp: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+ /* Hardware-breakpoint + svc */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(bp_svc));
+ asm volatile("bp_svc: svc #0");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+ GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+ /* Hardware-breakpoint + software-breakpoint */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(bp_brk));
+ asm volatile("bp_brk: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+ /* Watchpoint */
+ reset_debug_state();
+ install_wp(wpn, PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ /* Single-step */
+ reset_debug_state();
+ install_ss();
+ ss_idx = 0;
+ asm volatile("ss_start:\n"
+ "mrs x0, esr_el1\n"
+ "add x0, x0, #1\n"
+ "msr daifset, #8\n"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+ GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+ GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+ /* OS Lock does not block software-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ sw_bp_addr = 0;
+ asm volatile("sw_bp2: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+ /* OS Lock blocking hardware-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ install_hw_bp(bpn, PC(hw_bp2));
+ hw_bp_addr = 0;
+ asm volatile("hw_bp2: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+ /* OS Lock blocking watchpoint */
+ reset_debug_state();
+ enable_os_lock();
+ write_data = '\0';
+ wp_data_addr = 0;
+ install_wp(wpn, PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+ /* OS Lock blocking single-step */
+ reset_debug_state();
+ enable_os_lock();
+ ss_addr[0] = 0;
+ install_ss();
+ ss_idx = 0;
+ asm volatile("mrs x0, esr_el1\n\t"
+ "add x0, x0, #1\n\t"
+ "msr daifset, #8\n\t"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], 0);
+
+ /* Linked hardware-breakpoint */
+ hw_bp_addr = 0;
+ reset_debug_state();
+ install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
+ /* Set context id */
+ write_sysreg(ctx, contextidr_el1);
+ isb();
+ asm volatile("hw_bp_ctx: nop");
+ write_sysreg(0, contextidr_el1);
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
+
+ /* Linked watchpoint */
+ reset_debug_state();
+ install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
+ /* Set context id */
+ write_sysreg(ctx, contextidr_el1);
+ isb();
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+ sw_bp_addr = regs->pc;
+ regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+ hw_bp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+ wp_data_addr = read_sysreg(far_el1);
+ wp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+ __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
+ ss_addr[ss_idx++] = regs->pc;
+ regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+ svc_addr = regs->pc;
+}
+
+static void guest_code_ss(int test_cnt)
+{
+ uint64_t i;
+ uint64_t bvr, wvr, w_bvr, w_wvr;
+
+ for (i = 0; i < test_cnt; i++) {
+ /* Bits [1:0] of dbg{b,w}vr are RES0 */
+ w_bvr = i << 2;
+ w_wvr = i << 2;
+
+ /*
+ * Enable Single Step execution. Note! This _must_ be a bare
+ * ucall as the ucall() path uses atomic operations to manage
+ * the ucall structures, and the built-in "atomics" are usually
+ * implemented via exclusive access instructions. The exlusive
+ * monitor is cleared on ERET, and so taking debug exceptions
+ * during a LDREX=>STREX sequence will prevent forward progress
+ * and hang the guest/test.
+ */
+ GUEST_UCALL_NONE();
+
+ /*
+ * The userspace will verify that the pc is as expected during
+ * single step execution between iter_ss_begin and iter_ss_end.
+ */
+ asm volatile("iter_ss_begin:nop\n");
+
+ write_sysreg(w_bvr, dbgbvr0_el1);
+ write_sysreg(w_wvr, dbgwvr0_el1);
+ bvr = read_sysreg(dbgbvr0_el1);
+ wvr = read_sysreg(dbgwvr0_el1);
+
+ /* Userspace disables Single Step when the end is nigh. */
+ asm volatile("iter_ss_end:\n");
+
+ GUEST_ASSERT_EQ(bvr, w_bvr);
+ GUEST_ASSERT_EQ(wvr, w_wvr);
+ }
+ GUEST_DONE();
+}
+
+static int debug_version(uint64_t id_aa64dfr0)
+{
+ return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+}
+
+static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_BRK64, guest_sw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SVC64, guest_svc_handler);
+
+ /* Specify bpn/wpn/ctx_bpn to be tested */
+ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
+ pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
+
+void test_single_step_from_userspace(int test_cnt)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ struct kvm_run *run;
+ uint64_t pc, cmd;
+ uint64_t test_pc = 0;
+ bool ss_enable = false;
+ struct kvm_guest_debug debug = {};
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
+ run = vcpu->run;
+ vcpu_args_set(vcpu, 1, test_cnt);
+
+ while (1) {
+ vcpu_run(vcpu);
+ if (run->exit_reason != KVM_EXIT_DEBUG) {
+ cmd = get_ucall(vcpu, &uc);
+ if (cmd == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ } else if (cmd == UCALL_DONE) {
+ break;
+ }
+
+ TEST_ASSERT(cmd == UCALL_NONE,
+ "Unexpected ucall cmd 0x%lx", cmd);
+
+ debug.control = KVM_GUESTDBG_ENABLE |
+ KVM_GUESTDBG_SINGLESTEP;
+ ss_enable = true;
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
+
+ /* Check if the current pc is expected. */
+ pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+ TEST_ASSERT(!test_pc || pc == test_pc,
+ "Unexpected pc 0x%lx (expected 0x%lx)",
+ pc, test_pc);
+
+ if ((pc + 4) == (uint64_t)&iter_ss_end) {
+ test_pc = 0;
+ debug.control = KVM_GUESTDBG_ENABLE;
+ ss_enable = false;
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ /*
+ * If the current pc is between iter_ss_bgin and
+ * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
+ * be the current pc + 4.
+ */
+ if ((pc >= (uint64_t)&iter_ss_begin) &&
+ (pc < (uint64_t)&iter_ss_end))
+ test_pc = pc + 4;
+ else
+ test_pc = 0;
+ }
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run debug testing using the various breakpoint#, watchpoint# and
+ * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
+ */
+void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+{
+ uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+ int b, w, c;
+
+ /* Number of breakpoints */
+ brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+ __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+ /* Number of watchpoints */
+ wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+
+ /* Number of context aware breakpoints */
+ ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+
+ pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
+ brp_num, wrp_num, ctx_brp_num);
+
+ /* Number of normal (non-context aware) breakpoints */
+ normal_brp_num = brp_num - ctx_brp_num;
+
+ /* Lowest context aware breakpoint number */
+ ctx_brp_base = normal_brp_num;
+
+ /* Run tests with all supported breakpoints/watchpoints */
+ for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
+ for (b = 0; b < normal_brp_num; b++) {
+ for (w = 0; w < wrp_num; w++)
+ test_guest_debug_exceptions(b, w, c);
+ }
+ }
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int opt;
+ int ss_iteration = 10000;
+ uint64_t aa64dfr0;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+ __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
+ "Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+
+ while ((opt = getopt(argc, argv, "i:")) != -1) {
+ switch (opt) {
+ case 'i':
+ ss_iteration = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ test_guest_debug_exceptions_all(aa64dfr0);
+ test_single_step_from_userspace(ss_iteration);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+struct feature_id_reg {
+ __u64 reg;
+ __u64 id_reg;
+ __u64 feat_shift;
+ __u64 feat_min;
+};
+
+static struct feature_id_reg feat_id_regs[] = {
+ {
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 0,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 8,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 8,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 16,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 16,
+ 1
+ }
+};
+
+bool filter_reg(__u64 reg)
+{
+ /*
+ * DEMUX register presence depends on the host's CLIDR_EL1.
+ * This means there's no set of them that we can bless.
+ */
+ if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return true;
+
+ return false;
+}
+
+static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ int i, ret;
+ __u64 data, feat_val;
+
+ for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
+ if (feat_id_regs[i].reg == reg) {
+ ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
+ if (ret < 0)
+ return false;
+
+ feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
+ return feat_val >= feat_id_regs[i].feat_min;
+ }
+ }
+
+ return true;
+}
+
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ return check_supported_feat_reg(vcpu, reg);
+}
+
+bool check_reject_set(int err)
+{
+ return err == EPERM;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int feature;
+
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
+ }
+}
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define CORE_REGS_XX_NR_WORDS 2
+#define CORE_SPSR_XX_NR_WORDS 2
+#define CORE_FPREGS_XX_NR_WORDS 4
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 core_off = id & ~REG_MASK, idx;
+
+ /*
+ * core_off is the offset into struct kvm_regs
+ */
+ switch (core_off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ return "KVM_REG_ARM_CORE_REG(regs.sp)";
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ return "KVM_REG_ARM_CORE_REG(regs.pc)";
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ return "KVM_REG_ARM_CORE_REG(sp_el1)";
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ return "KVM_REG_ARM_CORE_REG(elr_el1)";
+ case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+ KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+ }
+
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+ return NULL;
+}
+
+static const char *sve_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 sve_off, n, i;
+
+ if (id == KVM_REG_ARM64_SVE_VLS)
+ return "KVM_REG_ARM64_SVE_VLS";
+
+ sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+ i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
+
+ switch (sve_off) {
+ case KVM_REG_ARM64_SVE_ZREG_BASE ...
+ KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+ n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
+ case KVM_REG_ARM64_SVE_PREG_BASE ...
+ KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+ n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
+ case KVM_REG_ARM64_SVE_FFR_BASE:
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
+ return "KVM_REG_ARM64_SVE_FFR(0)";
+ }
+
+ return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+ unsigned op0, op1, crn, crm, op2;
+ const char *reg_size = NULL;
+
+ TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
+
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U8:
+ reg_size = "KVM_REG_SIZE_U8";
+ break;
+ case KVM_REG_SIZE_U16:
+ reg_size = "KVM_REG_SIZE_U16";
+ break;
+ case KVM_REG_SIZE_U32:
+ reg_size = "KVM_REG_SIZE_U32";
+ break;
+ case KVM_REG_SIZE_U64:
+ reg_size = "KVM_REG_SIZE_U64";
+ break;
+ case KVM_REG_SIZE_U128:
+ reg_size = "KVM_REG_SIZE_U128";
+ break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
+ case KVM_REG_SIZE_U512:
+ reg_size = "KVM_REG_SIZE_U512";
+ break;
+ case KVM_REG_SIZE_U1024:
+ reg_size = "KVM_REG_SIZE_U1024";
+ break;
+ case KVM_REG_SIZE_U2048:
+ reg_size = "KVM_REG_SIZE_U2048";
+ break;
+ default:
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ }
+
+ switch (id & KVM_REG_ARM_COPROC_MASK) {
+ case KVM_REG_ARM_CORE:
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
+ break;
+ case KVM_REG_ARM_DEMUX:
+ TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+ reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+ break;
+ case KVM_REG_ARM64_SYSREG:
+ op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+ op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+ crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+ crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+ op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+ TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
+ printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+ break;
+ case KVM_REG_ARM_FW:
+ TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+ "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+ break;
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
+ "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
+ break;
+ case KVM_REG_ARM64_SVE:
+ printf("\t%s,\n", sve_id_to_str(prefix, id));
+ break;
+ default:
+ TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ }
+}
+
+/*
+ * The original blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
+ *
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
+ */
+static __u64 base_regs[] = {
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+ KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */
+ KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+ KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+ KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 2),
+ ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
+ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
+ ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */
+ ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */
+ ARM64_SYS_REG(2, 0, 0, 0, 4),
+ ARM64_SYS_REG(2, 0, 0, 0, 5),
+ ARM64_SYS_REG(2, 0, 0, 0, 6),
+ ARM64_SYS_REG(2, 0, 0, 0, 7),
+ ARM64_SYS_REG(2, 0, 0, 1, 4),
+ ARM64_SYS_REG(2, 0, 0, 1, 5),
+ ARM64_SYS_REG(2, 0, 0, 1, 6),
+ ARM64_SYS_REG(2, 0, 0, 1, 7),
+ ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */
+ ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */
+ ARM64_SYS_REG(2, 0, 0, 2, 4),
+ ARM64_SYS_REG(2, 0, 0, 2, 5),
+ ARM64_SYS_REG(2, 0, 0, 2, 6),
+ ARM64_SYS_REG(2, 0, 0, 2, 7),
+ ARM64_SYS_REG(2, 0, 0, 3, 4),
+ ARM64_SYS_REG(2, 0, 0, 3, 5),
+ ARM64_SYS_REG(2, 0, 0, 3, 6),
+ ARM64_SYS_REG(2, 0, 0, 3, 7),
+ ARM64_SYS_REG(2, 0, 0, 4, 4),
+ ARM64_SYS_REG(2, 0, 0, 4, 5),
+ ARM64_SYS_REG(2, 0, 0, 4, 6),
+ ARM64_SYS_REG(2, 0, 0, 4, 7),
+ ARM64_SYS_REG(2, 0, 0, 5, 4),
+ ARM64_SYS_REG(2, 0, 0, 5, 5),
+ ARM64_SYS_REG(2, 0, 0, 5, 6),
+ ARM64_SYS_REG(2, 0, 0, 5, 7),
+ ARM64_SYS_REG(2, 0, 0, 6, 4),
+ ARM64_SYS_REG(2, 0, 0, 6, 5),
+ ARM64_SYS_REG(2, 0, 0, 6, 6),
+ ARM64_SYS_REG(2, 0, 0, 6, 7),
+ ARM64_SYS_REG(2, 0, 0, 7, 4),
+ ARM64_SYS_REG(2, 0, 0, 7, 5),
+ ARM64_SYS_REG(2, 0, 0, 7, 6),
+ ARM64_SYS_REG(2, 0, 0, 7, 7),
+ ARM64_SYS_REG(2, 0, 0, 8, 4),
+ ARM64_SYS_REG(2, 0, 0, 8, 5),
+ ARM64_SYS_REG(2, 0, 0, 8, 6),
+ ARM64_SYS_REG(2, 0, 0, 8, 7),
+ ARM64_SYS_REG(2, 0, 0, 9, 4),
+ ARM64_SYS_REG(2, 0, 0, 9, 5),
+ ARM64_SYS_REG(2, 0, 0, 9, 6),
+ ARM64_SYS_REG(2, 0, 0, 9, 7),
+ ARM64_SYS_REG(2, 0, 0, 10, 4),
+ ARM64_SYS_REG(2, 0, 0, 10, 5),
+ ARM64_SYS_REG(2, 0, 0, 10, 6),
+ ARM64_SYS_REG(2, 0, 0, 10, 7),
+ ARM64_SYS_REG(2, 0, 0, 11, 4),
+ ARM64_SYS_REG(2, 0, 0, 11, 5),
+ ARM64_SYS_REG(2, 0, 0, 11, 6),
+ ARM64_SYS_REG(2, 0, 0, 11, 7),
+ ARM64_SYS_REG(2, 0, 0, 12, 4),
+ ARM64_SYS_REG(2, 0, 0, 12, 5),
+ ARM64_SYS_REG(2, 0, 0, 12, 6),
+ ARM64_SYS_REG(2, 0, 0, 12, 7),
+ ARM64_SYS_REG(2, 0, 0, 13, 4),
+ ARM64_SYS_REG(2, 0, 0, 13, 5),
+ ARM64_SYS_REG(2, 0, 0, 13, 6),
+ ARM64_SYS_REG(2, 0, 0, 13, 7),
+ ARM64_SYS_REG(2, 0, 0, 14, 4),
+ ARM64_SYS_REG(2, 0, 0, 14, 5),
+ ARM64_SYS_REG(2, 0, 0, 14, 6),
+ ARM64_SYS_REG(2, 0, 0, 14, 7),
+ ARM64_SYS_REG(2, 0, 0, 15, 4),
+ ARM64_SYS_REG(2, 0, 0, 15, 5),
+ ARM64_SYS_REG(2, 0, 0, 15, 6),
+ ARM64_SYS_REG(2, 0, 0, 15, 7),
+ ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */
+ ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
+ ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 3),
+ ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 7),
+ ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 3),
+ ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 6),
+ ARM64_SYS_REG(3, 0, 0, 4, 7),
+ ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 2),
+ ARM64_SYS_REG(3, 0, 0, 5, 3),
+ ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 6),
+ ARM64_SYS_REG(3, 0, 0, 5, 7),
+ ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 3),
+ ARM64_SYS_REG(3, 0, 0, 6, 4),
+ ARM64_SYS_REG(3, 0, 0, 6, 5),
+ ARM64_SYS_REG(3, 0, 0, 6, 6),
+ ARM64_SYS_REG(3, 0, 0, 6, 7),
+ ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 5),
+ ARM64_SYS_REG(3, 0, 0, 7, 6),
+ ARM64_SYS_REG(3, 0, 0, 7, 7),
+ ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
+ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
+ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
+ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
+ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
+ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
+ ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
+ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
+ ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 8, 0),
+ ARM64_SYS_REG(3, 3, 14, 8, 1),
+ ARM64_SYS_REG(3, 3, 14, 8, 2),
+ ARM64_SYS_REG(3, 3, 14, 8, 3),
+ ARM64_SYS_REG(3, 3, 14, 8, 4),
+ ARM64_SYS_REG(3, 3, 14, 8, 5),
+ ARM64_SYS_REG(3, 3, 14, 8, 6),
+ ARM64_SYS_REG(3, 3, 14, 8, 7),
+ ARM64_SYS_REG(3, 3, 14, 9, 0),
+ ARM64_SYS_REG(3, 3, 14, 9, 1),
+ ARM64_SYS_REG(3, 3, 14, 9, 2),
+ ARM64_SYS_REG(3, 3, 14, 9, 3),
+ ARM64_SYS_REG(3, 3, 14, 9, 4),
+ ARM64_SYS_REG(3, 3, 14, 9, 5),
+ ARM64_SYS_REG(3, 3, 14, 9, 6),
+ ARM64_SYS_REG(3, 3, 14, 9, 7),
+ ARM64_SYS_REG(3, 3, 14, 10, 0),
+ ARM64_SYS_REG(3, 3, 14, 10, 1),
+ ARM64_SYS_REG(3, 3, 14, 10, 2),
+ ARM64_SYS_REG(3, 3, 14, 10, 3),
+ ARM64_SYS_REG(3, 3, 14, 10, 4),
+ ARM64_SYS_REG(3, 3, 14, 10, 5),
+ ARM64_SYS_REG(3, 3, 14, 10, 6),
+ ARM64_SYS_REG(3, 3, 14, 10, 7),
+ ARM64_SYS_REG(3, 3, 14, 11, 0),
+ ARM64_SYS_REG(3, 3, 14, 11, 1),
+ ARM64_SYS_REG(3, 3, 14, 11, 2),
+ ARM64_SYS_REG(3, 3, 14, 11, 3),
+ ARM64_SYS_REG(3, 3, 14, 11, 4),
+ ARM64_SYS_REG(3, 3, 14, 11, 5),
+ ARM64_SYS_REG(3, 3, 14, 11, 6),
+ ARM64_SYS_REG(3, 3, 14, 12, 0),
+ ARM64_SYS_REG(3, 3, 14, 12, 1),
+ ARM64_SYS_REG(3, 3, 14, 12, 2),
+ ARM64_SYS_REG(3, 3, 14, 12, 3),
+ ARM64_SYS_REG(3, 3, 14, 12, 4),
+ ARM64_SYS_REG(3, 3, 14, 12, 5),
+ ARM64_SYS_REG(3, 3, 14, 12, 6),
+ ARM64_SYS_REG(3, 3, 14, 12, 7),
+ ARM64_SYS_REG(3, 3, 14, 13, 0),
+ ARM64_SYS_REG(3, 3, 14, 13, 1),
+ ARM64_SYS_REG(3, 3, 14, 13, 2),
+ ARM64_SYS_REG(3, 3, 14, 13, 3),
+ ARM64_SYS_REG(3, 3, 14, 13, 4),
+ ARM64_SYS_REG(3, 3, 14, 13, 5),
+ ARM64_SYS_REG(3, 3, 14, 13, 6),
+ ARM64_SYS_REG(3, 3, 14, 13, 7),
+ ARM64_SYS_REG(3, 3, 14, 14, 0),
+ ARM64_SYS_REG(3, 3, 14, 14, 1),
+ ARM64_SYS_REG(3, 3, 14, 14, 2),
+ ARM64_SYS_REG(3, 3, 14, 14, 3),
+ ARM64_SYS_REG(3, 3, 14, 14, 4),
+ ARM64_SYS_REG(3, 3, 14, 14, 5),
+ ARM64_SYS_REG(3, 3, 14, 14, 6),
+ ARM64_SYS_REG(3, 3, 14, 14, 7),
+ ARM64_SYS_REG(3, 3, 14, 15, 0),
+ ARM64_SYS_REG(3, 3, 14, 15, 1),
+ ARM64_SYS_REG(3, 3, 14, 15, 2),
+ ARM64_SYS_REG(3, 3, 14, 15, 3),
+ ARM64_SYS_REG(3, 3, 14, 15, 4),
+ ARM64_SYS_REG(3, 3, 14, 15, 5),
+ ARM64_SYS_REG(3, 3, 14, 15, 6),
+ ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
+};
+
+static __u64 vregs[] = {
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+
+static __u64 sve_regs[] = {
+ KVM_REG_ARM64_SVE_VLS,
+ KVM_REG_ARM64_SVE_ZREG(0, 0),
+ KVM_REG_ARM64_SVE_ZREG(1, 0),
+ KVM_REG_ARM64_SVE_ZREG(2, 0),
+ KVM_REG_ARM64_SVE_ZREG(3, 0),
+ KVM_REG_ARM64_SVE_ZREG(4, 0),
+ KVM_REG_ARM64_SVE_ZREG(5, 0),
+ KVM_REG_ARM64_SVE_ZREG(6, 0),
+ KVM_REG_ARM64_SVE_ZREG(7, 0),
+ KVM_REG_ARM64_SVE_ZREG(8, 0),
+ KVM_REG_ARM64_SVE_ZREG(9, 0),
+ KVM_REG_ARM64_SVE_ZREG(10, 0),
+ KVM_REG_ARM64_SVE_ZREG(11, 0),
+ KVM_REG_ARM64_SVE_ZREG(12, 0),
+ KVM_REG_ARM64_SVE_ZREG(13, 0),
+ KVM_REG_ARM64_SVE_ZREG(14, 0),
+ KVM_REG_ARM64_SVE_ZREG(15, 0),
+ KVM_REG_ARM64_SVE_ZREG(16, 0),
+ KVM_REG_ARM64_SVE_ZREG(17, 0),
+ KVM_REG_ARM64_SVE_ZREG(18, 0),
+ KVM_REG_ARM64_SVE_ZREG(19, 0),
+ KVM_REG_ARM64_SVE_ZREG(20, 0),
+ KVM_REG_ARM64_SVE_ZREG(21, 0),
+ KVM_REG_ARM64_SVE_ZREG(22, 0),
+ KVM_REG_ARM64_SVE_ZREG(23, 0),
+ KVM_REG_ARM64_SVE_ZREG(24, 0),
+ KVM_REG_ARM64_SVE_ZREG(25, 0),
+ KVM_REG_ARM64_SVE_ZREG(26, 0),
+ KVM_REG_ARM64_SVE_ZREG(27, 0),
+ KVM_REG_ARM64_SVE_ZREG(28, 0),
+ KVM_REG_ARM64_SVE_ZREG(29, 0),
+ KVM_REG_ARM64_SVE_ZREG(30, 0),
+ KVM_REG_ARM64_SVE_ZREG(31, 0),
+ KVM_REG_ARM64_SVE_PREG(0, 0),
+ KVM_REG_ARM64_SVE_PREG(1, 0),
+ KVM_REG_ARM64_SVE_PREG(2, 0),
+ KVM_REG_ARM64_SVE_PREG(3, 0),
+ KVM_REG_ARM64_SVE_PREG(4, 0),
+ KVM_REG_ARM64_SVE_PREG(5, 0),
+ KVM_REG_ARM64_SVE_PREG(6, 0),
+ KVM_REG_ARM64_SVE_PREG(7, 0),
+ KVM_REG_ARM64_SVE_PREG(8, 0),
+ KVM_REG_ARM64_SVE_PREG(9, 0),
+ KVM_REG_ARM64_SVE_PREG(10, 0),
+ KVM_REG_ARM64_SVE_PREG(11, 0),
+ KVM_REG_ARM64_SVE_PREG(12, 0),
+ KVM_REG_ARM64_SVE_PREG(13, 0),
+ KVM_REG_ARM64_SVE_PREG(14, 0),
+ KVM_REG_ARM64_SVE_PREG(15, 0),
+ KVM_REG_ARM64_SVE_FFR(0),
+ ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
+};
+
+static __u64 sve_rejects_set[] = {
+ KVM_REG_ARM64_SVE_VLS,
+};
+
+static __u64 pauth_addr_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
+};
+
+#define BASE_SUBLIST \
+ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+ { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+ .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST \
+ { \
+ .name = "pauth_address", \
+ .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
+ .regs = pauth_addr_regs, \
+ .regs_n = ARRAY_SIZE(pauth_addr_regs), \
+ }, \
+ { \
+ .name = "pauth_generic", \
+ .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
+ .regs = pauth_generic_regs, \
+ .regs_n = ARRAY_SIZE(pauth_generic_regs), \
+ }
+
+static struct vcpu_reg_list vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+ &vregs_config,
+ &vregs_pmu_config,
+ &sve_config,
+ &sve_pmu_config,
+ &pauth_config,
+ &pauth_pmu_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
+ *
+ * The test validates the basic hypercall functionalities that are exposed
+ * via the psuedo-firmware bitmap register. This includes the registers'
+ * read/write behavior before and after the VM has started, and if the
+ * hypercalls are properly masked or unmasked to the guest when disabled or
+ * enabled from the KVM userspace, respectively.
+ */
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
+
+/* Last valid bits of the bitmapped firmware registers */
+#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
+#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+
+struct kvm_fw_reg_info {
+ uint64_t reg; /* Register definition */
+ uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+};
+
+#define FW_REG_INFO(r) \
+ { \
+ .reg = r, \
+ .max_feat_bit = r##_BIT_MAX, \
+ }
+
+static const struct kvm_fw_reg_info fw_reg_info[] = {
+ FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+};
+
+enum test_stage {
+ TEST_STAGE_REG_IFACE,
+ TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
+ TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
+ TEST_STAGE_HVC_IFACE_FALSE_INFO,
+ TEST_STAGE_END,
+};
+
+static int stage = TEST_STAGE_REG_IFACE;
+
+struct test_hvc_info {
+ uint32_t func_id;
+ uint64_t arg1;
+};
+
+#define TEST_HVC_INFO(f, a1) \
+ { \
+ .func_id = f, \
+ .arg1 = a1, \
+ }
+
+static const struct test_hvc_info hvc_info[] = {
+ /* KVM_REG_ARM_STD_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
+
+ /* KVM_REG_ARM_STD_HYP_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
+
+ /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
+ ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
+};
+
+/* Feed false hypercall info to test the KVM behavior */
+static const struct test_hvc_info false_hvc_info[] = {
+ /* Feature support check against a different family of hypercalls */
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+ TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
+};
+
+static void guest_test_hvc(const struct test_hvc_info *hc_info)
+{
+ unsigned int i;
+ struct arm_smccc_res res;
+ unsigned int hvc_info_arr_sz;
+
+ hvc_info_arr_sz =
+ hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
+
+ for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
+ memset(&res, 0, sizeof(res));
+ smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+
+ switch (stage) {
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
+ break;
+ default:
+ GUEST_FAIL("Unexpected stage = %u", stage);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ while (stage != TEST_STAGE_END) {
+ switch (stage) {
+ case TEST_STAGE_REG_IFACE:
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ guest_test_hvc(hvc_info);
+ break;
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ guest_test_hvc(false_hvc_info);
+ break;
+ default:
+ GUEST_FAIL("Unexpected stage = %u", stage);
+ }
+
+ GUEST_SYNC(stage);
+ }
+
+ GUEST_DONE();
+}
+
+struct st_time {
+ uint32_t rev;
+ uint32_t attr;
+ uint64_t st_time;
+};
+
+#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
+#define ST_GPA_BASE (1 << 30)
+
+static void steal_time_init(struct kvm_vcpu *vcpu)
+{
+ uint64_t st_ipa = (ulong)ST_GPA_BASE;
+ unsigned int gpages;
+
+ gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
+ vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
+ KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
+}
+
+static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
+{
+ uint64_t val;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+ const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+ /* First 'read' should be an upper limit of the features supported */
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
+ "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+
+ /* Test a 'write' by disabling all the features of the register map */
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
+ reg_info->reg, errno);
+
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == 0,
+ "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+
+ /*
+ * Test enabling a feature that's not supported.
+ * Avoid this check if all the bits are occupied.
+ */
+ if (reg_info->max_feat_bit < 63) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
+ errno, reg_info->reg);
+ }
+ }
+}
+
+static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
+{
+ uint64_t val;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+ const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+ /*
+ * Before starting the VM, the test clears all the bits.
+ * Check if that's still the case.
+ */
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == 0,
+ "Expected all the features to be cleared for reg: 0x%lx",
+ reg_info->reg);
+
+ /*
+ * Since the VM has run at least once, KVM shouldn't allow modification of
+ * the registers and should return EBUSY. Set the registers and check for
+ * the expected errno.
+ */
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+ TEST_ASSERT(ret != 0 && errno == EBUSY,
+ "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
+ errno, reg_info->reg);
+ }
+}
+
+static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ steal_time_init(*vcpu);
+
+ return vm;
+}
+
+static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
+{
+ int prev_stage = stage;
+
+ pr_debug("Stage: %d\n", prev_stage);
+
+ /* Sync the stage early, the VM might be freed below. */
+ stage++;
+ sync_global_to_guest(*vm, stage);
+
+ switch (prev_stage) {
+ case TEST_STAGE_REG_IFACE:
+ test_fw_regs_after_vm_start(*vcpu);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ /* Start a new VM so that all the features are now enabled by default */
+ kvm_vm_free(*vm);
+ *vm = test_vm_create(vcpu);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ break;
+ default:
+ TEST_FAIL("Unknown test stage: %d", prev_stage);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = test_vm_create(&vcpu);
+
+ test_fw_regs_before_vm_start(vcpu);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ test_guest_stage(&vm, &vcpu);
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_run();
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * mmio_abort - Tests for userspace MMIO abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+extern char test_mmio_abort_insn;
+
+static void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_extabt(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_extabt(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while(0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while(0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while(0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while(0)
+
+static void guest_code(void)
+{
+ uint64_t val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existance, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GICv3 */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t pfr0;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+ "GICv3 not supported.");
+ kvm_vm_free(vm);
+
+ test_guest_no_gicv3();
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <asm/sysreg.h>
+#include <linux/bitfield.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define TEST_GVA 0xc0000000
+#define TEST_EXEC_GVA (TEST_GVA + 0x8)
+#define TEST_PTE_GVA 0xb0000000
+#define TEST_DATA 0x0123456789ABCDEF
+
+static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+
+#define CMD_NONE (0)
+#define CMD_SKIP_TEST (1ULL << 1)
+#define CMD_HOLE_PT (1ULL << 2)
+#define CMD_HOLE_DATA (1ULL << 3)
+#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)
+#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)
+#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)
+#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)
+#define CMD_SET_PTE_AF (1ULL << 8)
+
+#define PREPARE_FN_NR 10
+#define CHECK_FN_NR 10
+
+static struct event_cnt {
+ int mmio_exits;
+ int fail_vcpu_runs;
+ int uffd_faults;
+ /* uffd_faults is incremented from multiple threads. */
+ pthread_mutex_t uffd_faults_mutex;
+} events;
+
+struct test_desc {
+ const char *name;
+ uint64_t mem_mark_cmd;
+ /* Skip the test if any prepare function returns false */
+ bool (*guest_prepare[PREPARE_FN_NR])(void);
+ void (*guest_test)(void);
+ void (*guest_test_check[CHECK_FN_NR])(void);
+ uffd_handler_t uffd_pt_handler;
+ uffd_handler_t uffd_data_handler;
+ void (*dabt_handler)(struct ex_regs *regs);
+ void (*iabt_handler)(struct ex_regs *regs);
+ void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
+ void (*fail_vcpu_run_handler)(int ret);
+ uint32_t pt_memslot_flags;
+ uint32_t data_memslot_flags;
+ bool skip;
+ struct event_cnt expected_events;
+};
+
+struct test_params {
+ enum vm_mem_backing_src_type src_type;
+ struct test_desc *test_desc;
+};
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+ uint64_t page = vaddr >> 12;
+
+ dsb(ishst);
+ asm volatile("tlbi vaae1is, %0" :: "r" (page));
+ dsb(ish);
+ isb();
+}
+
+static void guest_write64(void)
+{
+ uint64_t val;
+
+ WRITE_ONCE(*guest_test_memory, TEST_DATA);
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+ uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+ uint64_t atomic;
+
+ atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+ return atomic >= 2;
+}
+
+static bool guest_check_dc_zva(void)
+{
+ uint64_t dczid = read_sysreg(dczid_el0);
+ uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+
+ return dzp == 0;
+}
+
+/* Compare and swap instruction. */
+static void guest_cas(void)
+{
+ uint64_t val;
+
+ GUEST_ASSERT(guest_check_lse());
+ asm volatile(".arch_extension lse\n"
+ "casal %0, %1, [%2]\n"
+ :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+static void guest_read64(void)
+{
+ uint64_t val;
+
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_at(void)
+{
+ uint64_t par;
+
+ asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
+ isb();
+ par = read_sysreg(par_el1);
+
+ /* Bit 1 indicates whether the AT was successful */
+ GUEST_ASSERT_EQ(par & 1, 0);
+}
+
+/*
+ * The size of the block written by "dc zva" is guaranteed to be between (2 <<
+ * 0) and (2 << 9), which is safe in our case as we need the write to happen
+ * for at least a word, and not more than a page.
+ */
+static void guest_dc_zva(void)
+{
+ uint16_t val;
+
+ asm volatile("dc zva, %0" :: "r" (guest_test_memory));
+ dsb(ish);
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, 0);
+}
+
+/*
+ * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
+ * And that's special because KVM must take special care with those: they
+ * should still count as accesses for dirty logging or user-faulting, but
+ * should be handled differently on mmio.
+ */
+static void guest_ld_preidx(void)
+{
+ uint64_t val;
+ uint64_t addr = TEST_GVA - 8;
+
+ /*
+ * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
+ * in a gap between memslots not backing by anything.
+ */
+ asm volatile("ldr %0, [%1, #8]!"
+ : "=r" (val), "+r" (addr));
+ GUEST_ASSERT_EQ(val, 0);
+ GUEST_ASSERT_EQ(addr, TEST_GVA);
+}
+
+static void guest_st_preidx(void)
+{
+ uint64_t val = TEST_DATA;
+ uint64_t addr = TEST_GVA - 8;
+
+ asm volatile("str %0, [%1, #8]!"
+ : "+r" (val), "+r" (addr));
+
+ GUEST_ASSERT_EQ(addr, TEST_GVA);
+ val = READ_ONCE(*guest_test_memory);
+}
+
+static bool guest_set_ha(void)
+{
+ uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+ uint64_t hadbs, tcr;
+
+ /* Skip if HA is not supported. */
+ hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+ if (hadbs == 0)
+ return false;
+
+ tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ write_sysreg(tcr, tcr_el1);
+ isb();
+
+ return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+ *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+ flush_tlb_page(TEST_GVA);
+
+ return true;
+}
+
+static void guest_check_pte_af(void)
+{
+ dsb(ish);
+ GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+}
+
+static void guest_check_write_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_write_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_exec(void)
+{
+ int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
+ int ret;
+
+ ret = code();
+ GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+ bool (*prepare_fn)(void);
+ int i;
+
+ for (i = 0; i < PREPARE_FN_NR; i++) {
+ prepare_fn = test->guest_prepare[i];
+ if (prepare_fn && !prepare_fn())
+ return false;
+ }
+
+ return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+ void (*check_fn)(void);
+ int i;
+
+ for (i = 0; i < CHECK_FN_NR; i++) {
+ check_fn = test->guest_test_check[i];
+ if (check_fn)
+ check_fn();
+ }
+}
+
+static void guest_code(struct test_desc *test)
+{
+ if (!guest_prepare(test))
+ GUEST_SYNC(CMD_SKIP_TEST);
+
+ GUEST_SYNC(test->mem_mark_cmd);
+
+ if (test->guest_test)
+ test->guest_test();
+
+ guest_test_check(test);
+ GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
+}
+
+static struct uffd_args {
+ char *copy;
+ void *hva;
+ uint64_t paging_size;
+} pt_args, data_args;
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uint64_t addr = msg->arg.pagefault.address;
+ uint64_t flags = msg->arg.pagefault.flags;
+ struct uffdio_copy copy;
+ int ret;
+
+ TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
+ "The only expected UFFD mode is MISSING");
+ TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
+
+ pr_debug("uffd fault: addr=%p write=%d\n",
+ (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
+
+ copy.src = (uint64_t)args->copy;
+ copy.dst = addr;
+ copy.len = args->paging_size;
+ copy.mode = 0;
+
+ ret = ioctl(uffd, UFFDIO_COPY, ©);
+ if (ret == -1) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
+ addr, errno);
+ return ret;
+ }
+
+ pthread_mutex_lock(&events.uffd_faults_mutex);
+ events.uffd_faults += 1;
+ pthread_mutex_unlock(&events.uffd_faults_mutex);
+ return 0;
+}
+
+static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ return uffd_generic_handler(mode, uffd, msg, &pt_args);
+}
+
+static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ return uffd_generic_handler(mode, uffd, msg, &data_args);
+}
+
+static void setup_uffd_args(struct userspace_mem_region *region,
+ struct uffd_args *args)
+{
+ args->hva = (void *)region->region.userspace_addr;
+ args->paging_size = region->region.memory_size;
+
+ args->copy = malloc(args->paging_size);
+ TEST_ASSERT(args->copy, "Failed to allocate data copy.");
+ memcpy(args->copy, args->hva, args->paging_size);
+}
+
+static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
+ struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
+{
+ struct test_desc *test = p->test_desc;
+ int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+
+ setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
+ setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
+
+ *pt_uffd = NULL;
+ if (test->uffd_pt_handler)
+ *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+ pt_args.hva,
+ pt_args.paging_size,
+ 1, test->uffd_pt_handler);
+
+ *data_uffd = NULL;
+ if (test->uffd_data_handler)
+ *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+ data_args.hva,
+ data_args.paging_size,
+ 1, test->uffd_data_handler);
+}
+
+static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+ struct uffd_desc *data_uffd)
+{
+ if (test->uffd_pt_handler)
+ uffd_stop_demand_paging(pt_uffd);
+ if (test->uffd_data_handler)
+ uffd_stop_demand_paging(data_uffd);
+
+ free(pt_args.copy);
+ free(data_args.copy);
+}
+
+static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ TEST_FAIL("There was no UFFD fault expected.");
+ return -1;
+}
+
+/* Returns false if the test should be skipped. */
+static bool punch_hole_in_backing_store(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ void *hva = (void *)region->region.userspace_addr;
+ uint64_t paging_size = region->region.memory_size;
+ int ret, fd = region->fd;
+
+ if (fd != -1) {
+ ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0, paging_size);
+ TEST_ASSERT(ret == 0, "fallocate failed");
+ } else {
+ ret = madvise(hva, paging_size, MADV_DONTNEED);
+ TEST_ASSERT(ret == 0, "madvise failed");
+ }
+
+ return true;
+}
+
+static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+ struct userspace_mem_region *region;
+ void *hva;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ hva = (void *)region->region.userspace_addr;
+
+ TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+
+ memcpy(hva, run->mmio.data, run->mmio.len);
+ events.mmio_exits += 1;
+}
+
+static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+ uint64_t data;
+
+ memcpy(&data, run->mmio.data, sizeof(data));
+ pr_debug("addr=%lld len=%d w=%d data=%lx\n",
+ run->mmio.phys_addr, run->mmio.len,
+ run->mmio.is_write, data);
+ TEST_FAIL("There was no MMIO exit expected.");
+}
+
+static bool check_write_in_dirty_log(struct kvm_vm *vm,
+ struct userspace_mem_region *region,
+ uint64_t host_pg_nr)
+{
+ unsigned long *bmap;
+ bool first_page_dirty;
+ uint64_t size = region->region.memory_size;
+
+ /* getpage_size() is not always equal to vm->page_size */
+ bmap = bitmap_zalloc(size / getpagesize());
+ kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
+ first_page_dirty = test_bit(host_pg_nr, bmap);
+ free(bmap);
+ return first_page_dirty;
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool handle_cmd(struct kvm_vm *vm, int cmd)
+{
+ struct userspace_mem_region *data_region, *pt_region;
+ bool continue_test = true;
+ uint64_t pte_gpa, pte_pg;
+
+ data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
+
+ if (cmd == CMD_SKIP_TEST)
+ continue_test = false;
+
+ if (cmd & CMD_HOLE_PT)
+ continue_test = punch_hole_in_backing_store(vm, pt_region);
+ if (cmd & CMD_HOLE_DATA)
+ continue_test = punch_hole_in_backing_store(vm, data_region);
+ if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
+ TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
+ "Missing write in dirty log");
+ if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
+ TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
+ "Missing s1ptw write in dirty log");
+ if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
+ TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
+ "Unexpected write in dirty log");
+ if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
+ TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
+ "Unexpected s1ptw write in dirty log");
+
+ return continue_test;
+}
+
+void fail_vcpu_run_no_handler(int ret)
+{
+ TEST_FAIL("Unexpected vcpu run failure");
+}
+
+void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
+{
+ TEST_ASSERT(errno == ENOSYS,
+ "The mmio handler should have returned not implemented.");
+ events.fail_vcpu_runs += 1;
+}
+
+typedef uint32_t aarch64_insn_t;
+extern aarch64_insn_t __exec_test[2];
+
+noinline void __return_0x77(void)
+{
+ asm volatile("__exec_test: mov x0, #0x77\n"
+ "ret\n");
+}
+
+/*
+ * Note that this function runs on the host before the test VM starts: there's
+ * no need to sync the D$ and I$ caches.
+ */
+static void load_exec_code_for_test(struct kvm_vm *vm)
+{
+ uint64_t *code;
+ struct userspace_mem_region *region;
+ void *hva;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ hva = (void *)region->region.userspace_addr;
+
+ assert(TEST_EXEC_GVA > TEST_GVA);
+ code = hva + TEST_EXEC_GVA - TEST_GVA;
+ memcpy(code, __exec_test, sizeof(__exec_test));
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ struct test_desc *test)
+{
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_DABT_CUR, no_dabt_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_IABT_CUR, no_iabt_handler);
+}
+
+static void setup_gva_maps(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ uint64_t pte_gpa;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ /* Map TEST_GVA first. This will install a new PTE. */
+ virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
+ /* Then map TEST_PTE_GVA to the above PTE. */
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
+}
+
+enum pf_test_memslots {
+ CODE_AND_DATA_MEMSLOT,
+ PAGE_TABLE_MEMSLOT,
+ TEST_DATA_MEMSLOT,
+};
+
+/*
+ * Create a memslot for code and data at pfn=0, and test-data and PT ones
+ * at max_gfn.
+ */
+static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
+{
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+ uint64_t guest_page_size = vm->page_size;
+ uint64_t max_gfn = vm_compute_max_gfn(vm);
+ /* Enough for 2M of code when using 4K guest pages. */
+ uint64_t code_npages = 512;
+ uint64_t pt_size, data_size, data_gpa;
+
+ /*
+ * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
+ * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13
+ * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
+ * twice that just in case.
+ */
+ pt_size = 26 * guest_page_size;
+
+ /* memslot sizes and gpa's must be aligned to the backing page size */
+ pt_size = align_up(pt_size, backing_src_pagesz);
+ data_size = align_up(guest_page_size, backing_src_pagesz);
+ data_gpa = (max_gfn * guest_page_size) - data_size;
+ data_gpa = align_down(data_gpa, backing_src_pagesz);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
+ CODE_AND_DATA_MEMSLOT, code_npages, 0);
+ vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
+ vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
+
+ vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
+ PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
+ p->test_desc->pt_memslot_flags);
+ vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
+
+ vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
+ data_size / guest_page_size,
+ p->test_desc->data_memslot_flags);
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void setup_ucall(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+
+ ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
+}
+
+static void setup_default_handlers(struct test_desc *test)
+{
+ if (!test->mmio_handler)
+ test->mmio_handler = mmio_no_handler;
+
+ if (!test->fail_vcpu_run_handler)
+ test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
+}
+
+static void check_event_counts(struct test_desc *test)
+{
+ TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+ TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+ TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+ struct test_desc *test = p->test_desc;
+
+ pr_debug("Test: %s\n", test->name);
+ pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+ pr_debug("Testing memory backing src type: %s\n",
+ vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+static void reset_event_counts(void)
+{
+ memset(&events, 0, sizeof(events));
+}
+
+/*
+ * This function either succeeds, skips the test (after setting test->skip), or
+ * fails with a TEST_FAIL that aborts all tests.
+ */
+static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ struct test_desc *test)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int ret;
+
+ run = vcpu->run;
+
+ for (;;) {
+ ret = _vcpu_run(vcpu);
+ if (ret) {
+ test->fail_vcpu_run_handler(ret);
+ goto done;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ if (!handle_cmd(vm, uc.args[1])) {
+ test->skip = true;
+ goto done;
+ }
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_NONE:
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ test->mmio_handler(vm, run);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *p = (struct test_params *)arg;
+ struct test_desc *test = p->test_desc;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct uffd_desc *pt_uffd, *data_uffd;
+
+ print_test_banner(mode, p);
+
+ vm = ____vm_create(VM_SHAPE(mode));
+ setup_memslots(vm, p);
+ kvm_vm_elf_load(vm, program_invocation_name);
+ setup_ucall(vm);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ setup_gva_maps(vm);
+
+ reset_event_counts();
+
+ /*
+ * Set some code in the data memslot for the guest to execute (only
+ * applicable to the EXEC tests). This has to be done before
+ * setup_uffd() as that function copies the memslot data for the uffd
+ * handler.
+ */
+ load_exec_code_for_test(vm);
+ setup_uffd(vm, p, &pt_uffd, &data_uffd);
+ setup_abort_handlers(vm, vcpu, test);
+ setup_default_handlers(test);
+ vcpu_args_set(vcpu, 1, test);
+
+ vcpu_run_loop(vm, vcpu, test);
+
+ kvm_vm_free(vm);
+ free_uffd(test, pt_uffd, data_uffd);
+
+ /*
+ * Make sure we check the events after the uffd threads have exited,
+ * which means they updated their respective event counters.
+ */
+ if (!test->skip)
+ check_event_counts(test);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-s mem-type]\n", name);
+ puts("");
+ guest_modes_help();
+ backing_src_help("-s");
+ puts("");
+}
+
+#define SNAME(s) #s
+#define SCAT2(a, b) SNAME(a ## _ ## b)
+#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))
+#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))
+
+#define _CHECK(_test) _CHECK_##_test
+#define _PREPARE(_test) _PREPARE_##_test
+#define _PREPARE_guest_read64 NULL
+#define _PREPARE_guest_ld_preidx NULL
+#define _PREPARE_guest_write64 NULL
+#define _PREPARE_guest_st_preidx NULL
+#define _PREPARE_guest_exec NULL
+#define _PREPARE_guest_at NULL
+#define _PREPARE_guest_dc_zva guest_check_dc_zva
+#define _PREPARE_guest_cas guest_check_lse
+
+/* With or without access flag checks */
+#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af
+#define _PREPARE_no_af NULL
+#define _CHECK_with_af guest_check_pte_af
+#define _CHECK_no_af NULL
+
+/* Performs an access and checks that no faults were triggered. */
+#define TEST_ACCESS(_access, _with_af, _mark_cmd) \
+{ \
+ .name = SCAT3(_access, _with_af, #_mark_cmd), \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .mem_mark_cmd = _mark_cmd, \
+ .guest_test = _access, \
+ .guest_test_check = { _CHECK(_with_af) }, \
+ .expected_events = { 0 }, \
+}
+
+#define TEST_UFFD(_access, _with_af, _mark_cmd, \
+ _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \
+{ \
+ .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mem_mark_cmd = _mark_cmd, \
+ .guest_test_check = { _CHECK(_with_af) }, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = _uffd_pt_handler, \
+ .expected_events = { .uffd_faults = _uffd_faults, }, \
+}
+
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
+{ \
+ .name = SCAT3(dirty_log, _access, _with_af), \
+ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
+ .expected_events = { 0 }, \
+}
+
+#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
+ _uffd_faults, _test_check, _pt_check) \
+{ \
+ .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
+ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .expected_events = { .uffd_faults = _uffd_faults, }, \
+}
+
+#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
+{ \
+ .name = SCAT2(ro_memslot, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits }, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \
+{ \
+ .name = SCAT2(ro_memslot_no_syndrome, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1 }, \
+}
+
+#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
+ _test_check) \
+{ \
+ .name = SCAT2(ro_memslot, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _test_check }, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits}, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \
+{ \
+ .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _test_check }, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1 }, \
+}
+
+#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \
+ _uffd_data_handler, _uffd_faults) \
+{ \
+ .name = SCAT2(ro_memslot_uffd, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits, \
+ .uffd_faults = _uffd_faults }, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \
+ _uffd_faults) \
+{ \
+ .name = SCAT2(ro_memslot_no_syndrome, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1, \
+ .uffd_faults = _uffd_faults }, \
+}
+
+static struct test_desc tests[] = {
+
+ /* Check that HW is setting the Access Flag (AF) (sanity checks). */
+ TEST_ACCESS(guest_read64, with_af, CMD_NONE),
+ TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
+ TEST_ACCESS(guest_cas, with_af, CMD_NONE),
+ TEST_ACCESS(guest_write64, with_af, CMD_NONE),
+ TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
+ TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
+ TEST_ACCESS(guest_exec, with_af, CMD_NONE),
+
+ /*
+ * Punch a hole in the data backing store, and then try multiple
+ * accesses: reads should rturn zeroes, and writes should
+ * re-populate the page. Moreover, the test also check that no
+ * exception was generated in the guest. Note that this
+ * reading/writing behavior is the same as reading/writing a
+ * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
+ * userspace.
+ */
+ TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
+
+ /*
+ * Punch holes in the data and PT backing stores and mark them for
+ * userfaultfd handling. This should result in 2 faults: the access
+ * on the data backing store, and its respective S1 page table walk
+ * (S1PTW).
+ */
+ TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ /*
+ * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
+ * The S1PTW fault should still be marked as a write.
+ */
+ TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_no_handler, uffd_pt_handler, 1),
+ TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+
+ /*
+ * Try accesses when the data and PT memory regions are both
+ * tracked for dirty logging.
+ */
+ TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_ld_preidx, with_af,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+
+ /*
+ * Access when the data and PT memory regions are both marked for
+ * dirty logging and UFFD at the same time. The expected result is
+ * that writes should mark the dirty log and trigger a userfaultfd
+ * write fault. Reads/execs should result in a read userfaultfd
+ * fault, and nothing in the dirty log. Any S1PTW should result in
+ * a write in the dirty log and a userfaultfd write.
+ */
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
+ uffd_data_handler,
+ 2, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ /*
+ * Access when both the PT and data regions are marked read-only
+ * (with KVM_MEM_READONLY). Writes with a syndrome result in an
+ * MMIO exit, writes with no syndrome (e.g., CAS) result in a
+ * failed vcpu run, and reads/execs with and without syndroms do
+ * not fault.
+ */
+ TEST_RO_MEMSLOT(guest_read64, 0, 0),
+ TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
+ TEST_RO_MEMSLOT(guest_at, 0, 0),
+ TEST_RO_MEMSLOT(guest_exec, 0, 0),
+ TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
+
+ /*
+ * The PT and data regions are both read-only and marked
+ * for dirty logging at the same time. The expected result is that
+ * for writes there should be no write in the dirty log. The
+ * readonly handling is the same as if the memslot was not marked
+ * for dirty logging: writes with a syndrome result in an MMIO
+ * exit, and writes with no syndrome result in a failed vcpu run.
+ */
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
+ 1, guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
+ guest_check_no_write_in_dirty_log),
+
+ /*
+ * The PT and data regions are both read-only and punched with
+ * holes tracked with userfaultfd. The expected result is the
+ * union of both userfaultfd and read-only behaviors. For example,
+ * write accesses result in a userfaultfd write fault and an MMIO
+ * exit. Writes with no syndrome result in a failed vcpu run and
+ * no userfaultfd write fault. Reads result in userfaultfd getting
+ * triggered.
+ */
+ TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
+ uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
+
+ { 0 }
+};
+
+static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
+{
+ struct test_desc *t;
+
+ for (t = &tests[0]; t->name; t++) {
+ if (t->skip)
+ continue;
+
+ struct test_params p = {
+ .src_type = src_type,
+ .test_desc = t,
+ };
+
+ for_each_guest_mode(run_test, &p);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type;
+ int opt;
+
+ src_type = DEFAULT_VM_MEM_SRC;
+
+ while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(0);
+ }
+ }
+
+ for_each_test_and_guest_mode(src_type);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_test - Tests relating to KVM's PSCI implementation.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This test includes:
+ * - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ * and userspace reading the targeted vCPU's registers.
+ * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ * KVM_SYSTEM_EVENT_SUSPEND UAPI.
+ */
+
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <asm/cputype.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+ uint64_t context_id)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+ 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+ uint64_t lowest_affinity_level)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+ 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+ 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_features(uint32_t func_id)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mp_state mp_state = {
+ .mp_state = KVM_MP_STATE_STOPPED,
+ };
+
+ vcpu_mp_state_set(vcpu, &mp_state);
+}
+
+static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
+ struct kvm_vcpu **target)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(2);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+
+ return vm;
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+ REPORT_GUEST_ASSERT(uc);
+}
+
+static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+ uint64_t obs_pc, obs_x0;
+
+ obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+ obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
+
+ TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+ "unexpected target cpu pc: %lx (expected: %lx)",
+ obs_pc, CPU_ON_ENTRY_ADDR);
+ TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+ "unexpected target context id: %lx (expected: %lx)",
+ obs_x0, CPU_ON_CONTEXT_ID);
+}
+
+static void guest_test_cpu_on(uint64_t target_cpu)
+{
+ uint64_t target_state;
+
+ GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+
+ do {
+ target_state = psci_affinity_info(target_cpu, 0);
+
+ GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+ (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+ } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+ GUEST_DONE();
+}
+
+static void host_test_cpu_on(void)
+{
+ struct kvm_vcpu *source, *target;
+ uint64_t target_mpidr;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = setup_vm(guest_test_cpu_on, &source, &target);
+
+ /*
+ * make sure the target is already off when executing the test.
+ */
+ vcpu_power_off(target);
+
+ target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+ vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
+ enter_guest(source);
+
+ if (get_ucall(source, &uc) != UCALL_DONE)
+ TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+
+ assert_vcpu_reset(target);
+ kvm_vm_free(vm);
+}
+
+static void guest_test_system_suspend(void)
+{
+ uint64_t ret;
+
+ /* assert that SYSTEM_SUSPEND is discoverable */
+ GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
+ GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
+
+ ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
+ GUEST_SYNC(ret);
+}
+
+static void host_test_system_suspend(void)
+{
+ struct kvm_vcpu *source, *target;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ vm = setup_vm(guest_test_system_suspend, &source, &target);
+ vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
+
+ vcpu_power_off(target);
+ run = source->run;
+
+ enter_guest(source);
+
+ TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
+ TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
+ "Unhandled system event: %u (expected: %u)",
+ run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_test_system_off2(void)
+{
+ uint64_t ret;
+
+ /* assert that SYSTEM_OFF2 is discoverable */
+ GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
+ PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+ GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
+ PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+
+ /* With non-zero 'cookie' field, it should fail */
+ ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
+ GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+ /*
+ * This would normally never return, so KVM sets the return value
+ * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
+ * that it can test both values for HIBERNATE_OFF.
+ */
+ ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
+ GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+ /*
+ * Revision F.b of the PSCI v1.3 specification documents zero as an
+ * alias for HIBERNATE_OFF, since that's the value used in earlier
+ * revisions of the spec and some implementations in the field.
+ */
+ ret = psci_system_off2(0, 1);
+ GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+ ret = psci_system_off2(0, 0);
+ GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+ GUEST_DONE();
+}
+
+static void host_test_system_off2(void)
+{
+ struct kvm_vcpu *source, *target;
+ struct kvm_mp_state mps;
+ uint64_t psci_version = 0;
+ int nr_shutdowns = 0;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ setup_vm(guest_test_system_off2, &source, &target);
+
+ psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
+
+ TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
+ "Unexpected PSCI version %lu.%lu",
+ PSCI_VERSION_MAJOR(psci_version),
+ PSCI_VERSION_MINOR(psci_version));
+
+ vcpu_power_off(target);
+ run = source->run;
+
+ enter_guest(source);
+ while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
+ TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
+ "Unhandled system event: %u (expected: %u)",
+ run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
+ TEST_ASSERT(run->system_event.ndata >= 1,
+ "Unexpected amount of system event data: %u (expected, >= 1)",
+ run->system_event.ndata);
+ TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
+ "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
+ run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
+
+ nr_shutdowns++;
+
+ /* Restart the vCPU */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu_mp_state_set(source, &mps);
+
+ enter_guest(source);
+ }
+
+ TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
+ TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
+
+ host_test_cpu_on();
+ host_test_system_suspend();
+ host_test_system_off2();
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+ FTR_EXACT, /* Use a predefined safe value */
+ FTR_LOWER_SAFE, /* Smaller value is safe */
+ FTR_HIGHER_SAFE, /* Bigger value is safe */
+ FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */
+ FTR_END, /* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED true /* Value should be treated as signed */
+#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+ char *name;
+ bool sign;
+ enum ftr_type type;
+ uint8_t shift;
+ uint64_t mask;
+ /*
+ * For FTR_EXACT, safe_val is used as the exact safe value.
+ * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+ */
+ int64_t safe_val;
+};
+
+struct test_feature_reg {
+ uint32_t reg;
+ const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
+ { \
+ .name = #NAME, \
+ .sign = SIGNED, \
+ .type = TYPE, \
+ .shift = SHIFT, \
+ .mask = MASK, \
+ .safe_val = SAFE_VAL, \
+ }
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END \
+ { \
+ .type = FTR_END, \
+ }
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+ REG_FTR_END,
+};
+
+#define TEST_REG(id, table) \
+ { \
+ .reg = id, \
+ .ftr_bits = &((table)[0]), \
+ }
+
+static struct test_feature_reg test_regs[] = {
+ TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+ TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+ TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+ TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+ TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+ TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
+ TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+ TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+ TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+ GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_CTR_EL0);
+
+ GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ if (ftr_bits->sign == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > ftr_bits->safe_val)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == ftr_max)
+ ftr = 0;
+ else if (ftr != 0)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > ftr_bits->safe_val)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max - 1)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr != 0 && ftr != ftr_max - 1)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ if (ftr_bits->sign == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max - 1;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else {
+ ftr = 0;
+ }
+
+ return ftr;
+}
+
+static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, new_val, ftr;
+
+ val = vcpu_get_reg(vcpu, reg);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_safe_value(ftr_bits, ftr);
+
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ vcpu_set_reg(vcpu, reg, val);
+ new_val = vcpu_get_reg(vcpu, reg);
+ TEST_ASSERT_EQ(new_val, val);
+
+ return new_val;
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, old_val, ftr;
+ int r;
+
+ val = vcpu_get_reg(vcpu, reg);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_invalid_value(ftr_bits, ftr);
+
+ old_val = val;
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ r = __vcpu_set_reg(vcpu, reg, val);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ val = vcpu_get_reg(vcpu, reg);
+ TEST_ASSERT_EQ(val, old_val);
+}
+
+static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+
+#define encoding_to_range_idx(encoding) \
+ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \
+ sys_reg_CRn(encoding), sys_reg_CRm(encoding), \
+ sys_reg_Op2(encoding))
+
+
+static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ int ret;
+
+ /* KVM should return error when reserved field is not zero */
+ range.reserved[0] = 1;
+ ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+ TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+ const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+ uint32_t reg_id = test_regs[i].reg;
+ uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+ int idx;
+
+ /* Get the index to masks array for the idreg */
+ idx = encoding_to_range_idx(reg_id);
+
+ for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
+ /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+ if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+ ksft_test_result_skip("%s on AARCH64 only system\n",
+ ftr_bits[j].name);
+ continue;
+ }
+
+ /* Make sure the feature field is writable */
+ TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+ test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+
+ test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
+ &ftr_bits[j]);
+
+ ksft_test_result_pass("%s\n", ftr_bits[j].name);
+ }
+ }
+}
+
+#define MPAM_IDREG_TEST 6
+static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ int idx, err;
+
+ /*
+ * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
+ * check that if it can be set to 1, (i.e. it is supported by the
+ * hardware), that it can't be set to other values.
+ */
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ /* Writeable? Nothing to test! */
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
+ if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
+ ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
+ return;
+ }
+
+ /* Get the id register value */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ /* Try to set MPAM=0. This should always be possible. */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+ if (err)
+ ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
+
+ /* Try to set MPAM=1 */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+ if (err)
+ ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
+
+ /* Try to set MPAM=2 */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+ if (err)
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
+ else
+ ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
+
+ /* And again for ID_AA64PFR1_EL1.MPAM_frac */
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /* Get the id register value */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+
+ /* Try to set MPAM_frac=0. This should always be possible. */
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err)
+ ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
+
+ /* Try to set MPAM_frac=1 */
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err)
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
+
+ /* Try to set MPAM_frac=2 */
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
+ else
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ /* Make sure the written values are seen by guest */
+ TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
+ uc.args[3]);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+/* Politely lifted from arch/arm64/include/asm/cache.h */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static void test_clidr(struct kvm_vcpu *vcpu)
+{
+ uint64_t clidr;
+ int level;
+
+ clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
+
+ /* find the first empty level in the cache hierarchy */
+ for (level = 1; level < 7; level++) {
+ if (!CLIDR_CTYPE(clidr, level))
+ break;
+ }
+
+ /*
+ * If you have a mind-boggling 7 levels of cache, congratulations, you
+ * get to fix this.
+ */
+ TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
+
+ /* stick in a unified cache level */
+ clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
+ test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
+}
+
+static void test_ctr(struct kvm_vcpu *vcpu)
+{
+ u64 ctr;
+
+ ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
+ ctr &= ~CTR_EL0_DIC_MASK;
+ if (ctr & CTR_EL0_IminLine_MASK)
+ ctr--;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
+ test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ test_clidr(vcpu);
+ test_ctr(vcpu);
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+
+ test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+{
+ size_t idx = encoding_to_range_idx(encoding);
+ uint64_t observed;
+
+ observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
+ TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+}
+
+static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
+ * architectural reset of the vCPU.
+ */
+ aarch64_vcpu_setup(vcpu, NULL);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
+ test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
+
+ test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool aarch64_only;
+ uint64_t val, el0;
+ int test_cnt;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Check for AARCH64 only system */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+
+ ksft_print_header();
+
+ test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
+ ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
+ MPAM_IDREG_TEST;
+
+ ksft_set_plan(test_cnt);
+
+ test_vm_ftr_id_regs(vcpu, aarch64_only);
+ test_vcpu_ftr_id_regs(vcpu);
+ test_user_set_mpam_reg(vcpu);
+
+ test_guest_reg_read(vcpu);
+
+ test_reset_preserves_id_regs(vcpu);
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ * - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ * is prevented from filtering the architecture range of SMCCC calls.
+ * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+ HVC_INSN,
+ SMC_INSN,
+};
+
+#define for_each_conduit(conduit) \
+ for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+ struct arm_smccc_res res;
+
+ if (conduit == SMC_INSN)
+ smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+ else
+ smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ struct kvm_smccc_filter filter = {
+ .base = start,
+ .nr_functions = nr_functions,
+ .action = action,
+ };
+
+ return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+ TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+
+ /*
+ * Enable in-kernel emulation of PSCI to ensure that calls are denied
+ * due to the SMCCC filter, not because of KVM.
+ */
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ struct kvm_smccc_filter filter = {
+ .base = PSCI_0_2_FN_PSCI_VERSION,
+ .nr_functions = 1,
+ .action = KVM_SMCCC_FILTER_DENY,
+ .pad = { -1 },
+ };
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ uint32_t smc64_fn;
+ int r;
+
+ r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+ 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+ 0, 0);
+
+ r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to filter 0 functions should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to overflow filter range should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to use reserved filter action should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter already configured range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+
+ TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+ "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_denied(vcpu);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+ enum smccc_conduit conduit)
+{
+ struct kvm_run *run = vcpu->run;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+ "Unexpected exit reason: %u", run->exit_reason);
+ TEST_ASSERT(run->hypercall.nr == func_id,
+ "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+ if (conduit == SMC_INSN)
+ TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+ "KVM_HYPERCALL_EXIT_SMC is not set");
+ else
+ TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+ "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+ kvm_vm_free(vm);
+ return !r;
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_supports_smccc_filter());
+
+ test_pad_must_be_zero();
+ test_invalid_nr_functions();
+ test_overflow_nr_functions();
+ test_reserved_action();
+ test_filter_reserved_range();
+ test_filter_overlap();
+ test_filter_denied();
+ test_filter_fwd_to_user();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
+{
+ struct kvm_vcpu *vcpu0, *vcpu1;
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+ if (ret)
+ goto free_exit;
+
+ vcpu1 = __vm_vcpu_add(vm, 1);
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
+{
+ struct kvm_vcpu *vcpu0, *vcpu1;
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ vcpu1 = __vm_vcpu_add(vm, 1);
+
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+ if (ret)
+ goto free_exit;
+
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+ struct kvm_vcpu_init init0, init1;
+ struct kvm_vm *vm;
+ int ret;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
+
+ /* Get the preferred target type and copy that to init1 for later use */
+ vm = vm_create_barebones();
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
+ kvm_vm_free(vm);
+ init1 = init0;
+
+ /* Test with 64bit vCPUs */
+ ret = add_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with 32bit vCPUs */
+ init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with mixed-width vCPUs */
+ init0.features[0] = 0;
+ init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init1);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init1);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic init sequence tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS 4
+
+#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
+
+#define GICR_TYPER 0x8
+
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
+struct vm_gic {
+ struct kvm_vm *vm;
+ int gic_fd;
+ uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+/*
+ * Helpers to access a redistributor register and verify the ioctl() failed or
+ * succeeded as expected, and provided the correct value on success.
+ */
+static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
+ int want, const char *msg)
+{
+ uint32_t ignored_val;
+ int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &ignored_val);
+
+ TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
+}
+
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+ const char *msg)
+{
+ uint32_t val;
+
+ kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &val);
+ TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
+}
+
+/* dummy guest code */
+static void guest_code(void)
+{
+ GUEST_SYNC(0);
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+ GUEST_DONE();
+}
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
+ uint32_t nr_vcpus,
+ struct kvm_vcpu *vcpus[])
+{
+ struct vm_gic v;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ return v;
+}
+
+static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
+{
+ struct vm_gic v;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = vm_create_barebones();
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ return v;
+}
+
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+ close(v->gic_fd);
+ kvm_vm_free(v->vm);
+}
+
+struct vgic_region_attr {
+ uint64_t attr;
+ uint64_t size;
+ uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+ .size = 0x10000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+ .size = NR_VCPUS * 0x20000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+ .size = 0x1000,
+ .alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+ .size = 0x2000,
+ .alignment = 0x1000,
+};
+
+/**
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
+ */
+static void subtest_dist_rdist(struct vm_gic *v)
+{
+ int ret;
+ uint64_t addr;
+ struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+ struct vgic_region_attr dist;
+
+ rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+ : gic_v2_cpu_region;
+ dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+ : gic_v2_dist_region;
+
+ /* Check existing group/attributes */
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
+
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
+
+ /* check non existing attribute */
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
+ TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
+
+ /* misaligned DIST and REDIST address settings */
+ addr = dist.alignment / 0x10;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
+
+ addr = rdist.alignment / 0x10;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
+
+ /* out of range address */
+ addr = max_phys_size;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+ /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+ addr = max_phys_size - dist.alignment;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "half of the redist is beyond IPA limit");
+
+ /* set REDIST base address @0x0*/
+ addr = 0x00000;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+
+ /* Attempt to create a second legacy redistributor region */
+ addr = 0xE0000;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
+
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ if (!ret) {
+ /* Attempt to mix legacy and new redistributor regions */
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to mix GICv3 REDIST and REDIST_REGION");
+ }
+
+ /*
+ * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
+ * on first vcpu run instead.
+ */
+ addr = rdist.size - rdist.alignment;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+}
+
+/* Test the new REDIST region API */
+static void subtest_v3_redist_regions(struct vm_gic *v)
+{
+ uint64_t addr, expected_addr;
+ int ret;
+
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ TEST_ASSERT(!ret, "Multiple redist regions advertised");
+
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to register the first rdist region with index != 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "register an rdist region overlapping with another one");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with base address beyond IPA range");
+
+ /* The last redist is above the pa range. */
+ addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with top address beyond IPA range");
+
+ addr = 0x260000;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
+
+ /*
+ * Now there are 2 redist regions:
+ * region 0 @ 0x200000 2 redists
+ * region 1 @ 0x240000 1 redist
+ * Attempt to read their characteristics
+ */
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
+ expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
+ expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
+
+ addr = 0x260000;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
+}
+
+/*
+ * VGIC KVM device is created and initialized before the secondary CPUs
+ * get created
+ */
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret, i;
+
+ v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
+
+ subtest_dist_rdist(&v);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+/* All the VCPUs are created before the VGIC KVM device gets initialized */
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret;
+
+ v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
+
+ subtest_dist_rdist(&v);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+#define KVM_VGIC_V2_ATTR(offset, cpu) \
+ (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
+ FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
+
+#define GIC_CPU_CTRL 0x00
+
+static void test_v2_uaccess_cpuif_no_vcpus(void)
+{
+ struct vm_gic v;
+ u64 val = 0;
+ int ret;
+
+ v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
+ subtest_dist_rdist(&v);
+
+ ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
+ TEST_ASSERT(ret && errno == EINVAL,
+ "accessed non-existent CPU interface, want errno: %i",
+ EINVAL);
+ ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "accessed non-existent CPU interface, want errno: %i",
+ EINVAL);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "accessed non-existent CPU interface, want errno: %i",
+ EINVAL);
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_new_redist_regions(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ void *dummy = NULL;
+ struct vm_gic v;
+ uint64_t addr;
+ int ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
+ vm_gic_destroy(&v);
+
+ /* step2 */
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
+
+ vm_gic_destroy(&v);
+
+ /* step 3 */
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
+ TEST_ASSERT(ret && errno == EFAULT,
+ "register a third region allowing to cover the 4 vcpus");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(!ret, "vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_typer_accesses(void)
+{
+ struct vm_gic v;
+ uint64_t addr;
+ int ret, i;
+
+ v.vm = vm_create(NR_VCPUS);
+ (void)vm_vcpu_add(v.vm, 0, guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ (void)vm_vcpu_add(v.vm, 3, guest_code);
+
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
+ "attempting to read GICR_TYPER of non created vcpu");
+
+ (void)vm_vcpu_add(v.vm, 1, guest_code);
+
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
+ "read GICR_TYPER before GIC initialized");
+
+ (void)vm_vcpu_add(v.vm, 2, guest_code);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ for (i = 0; i < NR_VCPUS ; i++) {
+ v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
+ "read GICR_TYPER before rdist region setting");
+ }
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ /* The 2 first rdists should be put there (vcpu 0 and 3) */
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
+
+ addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
+
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
+ "no redist region attached to vcpu #1 yet, last cannot be returned");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
+ "no redist region attached to vcpu #2, last cannot be returned");
+
+ addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
+ "read typer of rdist #1, last properly returned");
+
+ vm_gic_destroy(&v);
+}
+
+static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
+ uint32_t vcpuids[])
+{
+ struct vm_gic v;
+ int i;
+
+ v.vm = vm_create(nr_vcpus);
+ for (i = 0; i < nr_vcpus; i++)
+ vm_vcpu_add(v.vm, vcpuids[i], guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ return v;
+}
+
+/**
+ * Test GICR_TYPER last bit with new redist regions
+ * rdist regions #1 and #2 are contiguous
+ * rdist region #0 @0x100000 2 rdist capacity
+ * rdists: 0, 3 (Last)
+ * rdist region #1 @0x240000 2 rdist capacity
+ * rdists: 5, 4 (Last)
+ * rdist region #2 @0x200000 2 rdist capacity
+ * rdists: 1, 2
+ */
+static void test_v3_last_bit_redist_regions(void)
+{
+ uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ struct vm_gic v;
+ uint64_t addr;
+
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
+ v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
+
+ vm_gic_destroy(&v);
+}
+
+/* Test last bit with legacy region */
+static void test_v3_last_bit_single_rdist(void)
+{
+ uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ struct vm_gic v;
+ uint64_t addr;
+
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ addr = 0x10000;
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
+
+ vm_gic_destroy(&v);
+}
+
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret, i;
+ uint64_t addr;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
+
+ /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+ addr = max_phys_size - (3 * 2 * 0x10000);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+ addr = 0x00000;
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ /* Attempt to run a vcpu without enough redist space. */
+ ret = run_vcpu(vcpus[2]);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "redist base+size above PA range detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ uint64_t addr;
+ int its_fd, ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+
+ addr = 0x401000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "ITS region with misaligned address");
+
+ addr = max_phys_size;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register ITS region with base address beyond IPA range");
+
+ addr = max_phys_size - 0x10000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "Half of ITS region is beyond IPA range");
+
+ /* This one succeeds setting the ITS base */
+ addr = 0x400000;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+
+ addr = 0x300000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+ close(its_fd);
+ vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ uint32_t other;
+ int ret;
+
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+ /* try to create a non existing KVM device */
+ ret = __kvm_test_create_device(v.vm, 0);
+ TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+ /* trial mode */
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+ /* try to create the other gic_dev_type */
+ other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+ : KVM_DEV_TYPE_ARM_VGIC_V2;
+
+ if (!__kvm_test_create_device(v.vm, other)) {
+ ret = __kvm_create_device(v.vm, other);
+ TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
+ "create GIC device while other version exists");
+ }
+
+ vm_gic_destroy(&v);
+
+ return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+ test_vcpus_then_vgic(gic_dev_type);
+ test_vgic_then_vcpus(gic_dev_type);
+
+ if (VGIC_DEV_IS_V2(gic_dev_type))
+ test_v2_uaccess_cpuif_no_vcpus();
+
+ if (VGIC_DEV_IS_V3(gic_dev_type)) {
+ test_v3_new_redist_regions();
+ test_v3_typer_accesses();
+ test_v3_last_bit_redist_regions();
+ test_v3_last_bit_single_rdist();
+ test_v3_redist_ipa_range_check_at_vcpu_run();
+ test_v3_its_region();
+ }
+}
+
+int main(int ac, char **av)
+{
+ int ret;
+ int pa_bits;
+ int cnt_impl = 0;
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (!ret) {
+ pr_info("Running GIC_v3 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+ cnt_impl++;
+ }
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+ if (!ret) {
+ pr_info("Running GIC_v2 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+ cnt_impl++;
+ }
+
+ if (!cnt_impl) {
+ print_skip("No GICv2 nor GICv3 support");
+ exit(KSFT_SKIP);
+ }
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+ uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+ bool level_sensitive; /* 1 is level, 0 is edge */
+ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS 32
+#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+ KVM_INJECT_EDGE_IRQ_LINE = 1,
+ KVM_SET_IRQ_LINE,
+ KVM_SET_IRQ_LINE_HIGH,
+ KVM_SET_LEVEL_INFO_HIGH,
+ KVM_INJECT_IRQFD,
+ KVM_WRITE_ISPENDR,
+ KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+ kvm_inject_cmd cmd;
+ uint32_t first_intid;
+ uint32_t num;
+ int level;
+ bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
+ kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num) \
+ _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid) \
+ kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+ kvm_inject_cmd cmd;
+ /* can inject PPIs, PPIs, and/or SPIs. */
+ bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+ /* sgi ppi spi */
+ { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, true, false, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+ /* sgi ppi spi */
+ { KVM_SET_IRQ_LINE_HIGH, false, true, true },
+ { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, false, true, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+ /* sgi ppi spi */
+ { KVM_WRITE_ISACTIVER, true, true, true },
+ { 0, },
+};
+
+#define for_each_inject_fn(t, f) \
+ for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f) \
+ for_each_inject_fn(t, f) \
+ if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f) \
+ for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+ int i;
+
+ irq_handled = 0;
+ for (i = 0; i <= MAX_SPI; i++)
+ irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+ uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
+
+ dsb(sy);
+ return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+ write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
+ isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+ uint32_t intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(gic_irq_get_active(intid));
+
+ if (!level_sensitive)
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+ if (level_sensitive)
+ guest_set_irq_line(intid, 0);
+
+ GUEST_ASSERT(intid < MAX_SPI);
+ irqnr_received[intid] += 1;
+ irq_handled += 1;
+
+ gic_set_eoi(intid);
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ if (eoi_split)
+ gic_set_dir(intid);
+
+ GUEST_ASSERT(!gic_irq_get_active(intid));
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure)
+{
+ struct kvm_inject_args args = {
+ .cmd = cmd,
+ .first_intid = first_intid,
+ .num = num,
+ .level = level,
+ .expect_failure = expect_failure,
+ };
+ GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY() \
+do { \
+ uint32_t _intid; \
+ _intid = gic_get_and_ack_irq(); \
+ GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
+{ \
+ guest_irq_generic_handler(split, lev); \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+ {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+ {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+ int i;
+
+ for (i = 0; i < args->nr_irqs; i++)
+ gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+ kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+ uint32_t intid, kvm_inject_cmd cmd)
+{
+ reset_stats();
+
+ _KVM_INJECT(cmd, intid, true);
+ /* no IRQ to handle on entry */
+
+ GUEST_ASSERT_EQ(irq_handled, 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t i;
+
+ reset_stats();
+
+ /* Cycle over all priorities to make things more interesting. */
+ for (i = first_intid; i < num + first_intid; i++)
+ gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+ asm volatile("msr daifset, #2" : : : "memory");
+ KVM_INJECT_MULTI(cmd, first_intid, num);
+
+ while (irq_handled < num) {
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
+ }
+ local_irq_enable();
+
+ GUEST_ASSERT_EQ(irq_handled, num);
+ for (i = first_intid; i < num + first_intid; i++)
+ GUEST_ASSERT_EQ(irqnr_received[i], 1);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs). This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t prio, intid, ap1r;
+ int i;
+
+ /*
+ * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ /*
+ * In a real migration, KVM would restore all GIC state before running
+ * guest code.
+ */
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+ KVM_ACTIVATE(cmd, intid);
+ ap1r = gic_read_ap1r0();
+ ap1r |= 1U << i;
+ gic_write_ap1r0(ap1r);
+ }
+
+ /* This is where the "migration" would occur. */
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+ uint32_t intid;
+
+ do {
+ asm volatile("wfi" : : : "memory");
+ intid = gic_get_and_ack_irq();
+ } while (intid == IAR_SPURIOUS);
+
+ return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions. This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+ uint32_t first_intid, int num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ local_irq_disable();
+
+ for (i = 0; i < num; i++) {
+ uint32_t tmp;
+ intid = i + first_intid;
+ KVM_INJECT(cmd, intid);
+ /* Each successive IRQ will preempt the previous one. */
+ tmp = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(tmp, intid);
+ if (args->level_sensitive)
+ guest_set_irq_line(intid, 0);
+ }
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ local_irq_enable();
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+ uint32_t nr_irqs = args->nr_irqs;
+
+ if (f->sgi) {
+ guest_inject(args, MIN_SGI, 1, f->cmd);
+ guest_inject(args, 0, 16, f->cmd);
+ }
+
+ if (f->ppi)
+ guest_inject(args, MIN_PPI, 1, f->cmd);
+
+ if (f->spi) {
+ guest_inject(args, MIN_SPI, 1, f->cmd);
+ guest_inject(args, nr_irqs - 1, 1, f->cmd);
+ guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+ }
+}
+
+static void test_injection_failure(struct test_args *args,
+ struct kvm_inject_desc *f)
+{
+ uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+ test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /*
+ * Test up to 4 levels of preemption. The reason is that KVM doesn't
+ * currently implement the ability to have more than the number-of-LRs
+ * number of concurrently active IRQs. The number of LRs implemented is
+ * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+ */
+ if (f->sgi)
+ test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+ if (f->sgi)
+ guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args *args)
+{
+ uint32_t i, nr_irqs = args->nr_irqs;
+ bool level_sensitive = args->level_sensitive;
+ struct kvm_inject_desc *f, *inject_fns;
+
+ gic_init(GIC_V3, 1);
+
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
+ for (i = MIN_SPI; i < nr_irqs; i++)
+ gic_irq_set_config(i, !level_sensitive);
+
+ gic_set_eoi_split(args->eoi_split);
+
+ reset_priorities(args);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ inject_fns = level_sensitive ? inject_level_fns
+ : inject_edge_fns;
+
+ local_irq_enable();
+
+ /* Start the tests. */
+ for_each_supported_inject_fn(args, inject_fns, f) {
+ test_injection(args, f);
+ test_preemption(args, f);
+ test_injection_failure(args, f);
+ }
+
+ /*
+ * Restore the active state of IRQs. This would happen when live
+ * migrating IRQs in the middle of being handled.
+ */
+ for_each_supported_activate_fn(args, set_active_fns, f)
+ test_restore_active(args, f);
+
+ GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+ struct test_args *test_args, bool expect_failure)
+{
+ int ret;
+
+ if (!expect_failure) {
+ kvm_arm_irq_line(vm, intid, level);
+ } else {
+ /* The interface doesn't allow larger intid's. */
+ if (intid > KVM_ARM_IRQ_NUM_MASK)
+ return;
+
+ ret = _kvm_arm_irq_line(vm, intid, level);
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause KVM_IRQ_LINE "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+ bool expect_failure)
+{
+ if (!expect_failure) {
+ kvm_irq_set_level_info(gic_fd, intid, level);
+ } else {
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+ /*
+ * The kernel silently fails for invalid SPIs and SGIs (which
+ * are not level-sensitive). It only checks for intid to not
+ * spill over 1U << 10 (the max reserved SPI). Also, callers
+ * are supposed to mask the intid with 0x3ff (1023).
+ */
+ if (intid > VGIC_MAX_RESERVED)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ struct kvm_irq_routing *routing;
+ int ret;
+ uint64_t i;
+
+ assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+ routing = kvm_gsi_routing_create();
+ for (i = intid; i < (uint64_t)intid + num; i++)
+ kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+ if (!expect_failure) {
+ kvm_gsi_routing_write(vm, routing);
+ } else {
+ ret = _kvm_gsi_routing_write(vm, routing);
+ /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+ if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+ struct kvm_vcpu *vcpu,
+ bool expect_failure)
+{
+ /*
+ * Ignore this when expecting failure as invalid intids will lead to
+ * either trying to inject SGIs when we configured the test to be
+ * level_sensitive (or the reverse), or inject large intids which
+ * will lead to writing above the ISPENDR register space (and we
+ * don't want to do that either).
+ */
+ if (!expect_failure)
+ kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ int fd[MAX_SPI];
+ uint64_t val;
+ int ret, f;
+ uint64_t i;
+
+ /*
+ * There is no way to try injecting an SGI or PPI as the interface
+ * starts counting from the first SPI (above the private ones), so just
+ * exit.
+ */
+ if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+ return;
+
+ kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+ kvm_max_routes, expect_failure);
+
+ /*
+ * If expect_failure, then just to inject anyway. These
+ * will silently fail. And in any case, the guest will check
+ * that no actual interrupt was injected for those cases.
+ */
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ fd[f] = eventfd(0, 0);
+ TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ struct kvm_irqfd irqfd = {
+ .fd = fd[f],
+ .gsi = i - MIN_SPI,
+ };
+ assert(i <= (uint64_t)UINT_MAX);
+ vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ val = 1;
+ ret = write(fd[f], &val, sizeof(uint64_t));
+ TEST_ASSERT(ret == sizeof(uint64_t),
+ __KVM_SYSCALL_ERROR("write()", ret));
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i) \
+ for ((tmp) = (i) = (first); \
+ (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
+{
+ kvm_inject_cmd cmd = inject_args->cmd;
+ uint32_t intid = inject_args->first_intid;
+ uint32_t num = inject_args->num;
+ int level = inject_args->level;
+ bool expect_failure = inject_args->expect_failure;
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t tmp;
+ uint32_t i;
+
+ /* handles the valid case: intid=0xffffffff num=1 */
+ assert(intid < UINT_MAX - num || num == 1);
+
+ switch (cmd) {
+ case KVM_INJECT_EDGE_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 0, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, level, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_LEVEL_INFO_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_set_level_info_check(gic_fd, i, 1,
+ expect_failure);
+ break;
+ case KVM_INJECT_IRQFD:
+ kvm_routing_and_irqfd_check(vm, intid, num,
+ test_args->kvm_max_routes,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISPENDR:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISACTIVER:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_isactiver(gic_fd, i, vcpu);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args)
+{
+ struct kvm_inject_args *kvm_args_hva;
+ vm_vaddr_t kvm_args_gva;
+
+ kvm_args_gva = uc->args[1];
+ kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+ memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+ printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+ args->nr_irqs, args->level_sensitive,
+ args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+ struct ucall uc;
+ int gic_fd;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_inject_args inject_args;
+ vm_vaddr_t args_gva;
+
+ struct test_args args = {
+ .nr_irqs = nr_irqs,
+ .level_sensitive = level_sensitive,
+ .eoi_split = eoi_split,
+ .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+ .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+ };
+
+ print_args(&args);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpu, 1, args_gva);
+
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ kvm_inject_get_call(vm, &uc, &inject_args);
+ run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+ printf(
+ "\n"
+ "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+ printf(" -n: specify number of IRQs to setup the vgic with. "
+ "It has to be a multiple of 32 and between 64 and 1024.\n");
+ printf(" -e: if 1 then EOI is split into a write to DIR on top "
+ "of writing EOI.\n");
+ printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+ puts("");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t nr_irqs = 64;
+ bool default_args = true;
+ bool level_sensitive = false;
+ int opt;
+ bool eoi_split = false;
+
+ while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+ switch (opt) {
+ case 'n':
+ nr_irqs = atoi_non_negative("Number of IRQs", optarg);
+ if (nr_irqs > 1024 || nr_irqs % 32)
+ help(argv[0]);
+ break;
+ case 'e':
+ eoi_split = (bool)atoi_paranoid(optarg);
+ default_args = false;
+ break;
+ case 'l':
+ level_sensitive = (bool)atoi_paranoid(optarg);
+ default_args = false;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ /*
+ * If the user just specified nr_irqs and/or gic_version, then run all
+ * combinations.
+ */
+ if (default_args) {
+ test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ } else {
+ test_vgic(nr_irqs, level_sensitive, eoi_split);
+ }
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_lpi_stress - Stress test for KVM's ITS emulation
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+
+#include <linux/sizes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_v3_its.h"
+#include "processor.h"
+#include "ucall.h"
+#include "vgic.h"
+
+#define TEST_MEMSLOT_INDEX 1
+
+#define GIC_LPI_OFFSET 8192
+
+static size_t nr_iterations = 1000;
+static vm_paddr_t gpa_base;
+
+static struct kvm_vm *vm;
+static struct kvm_vcpu **vcpus;
+static int gic_fd, its_fd;
+
+static struct test_data {
+ bool request_vcpus_stop;
+ u32 nr_cpus;
+ u32 nr_devices;
+ u32 nr_event_ids;
+
+ vm_paddr_t device_table;
+ vm_paddr_t collection_table;
+ vm_paddr_t cmdq_base;
+ void *cmdq_base_va;
+ vm_paddr_t itt_tables;
+
+ vm_paddr_t lpi_prop_table;
+ vm_paddr_t lpi_pend_tables;
+} test_data = {
+ .nr_cpus = 1,
+ .nr_devices = 1,
+ .nr_event_ids = 16,
+};
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ u32 intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
+ gic_set_eoi(intid);
+}
+
+static void guest_setup_its_mappings(void)
+{
+ u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
+ u32 nr_events = test_data.nr_event_ids;
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+
+ for (coll_id = 0; coll_id < nr_cpus; coll_id++)
+ its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
+
+ /* Round-robin the LPIs to all of the vCPUs in the VM */
+ coll_id = 0;
+ for (device_id = 0; device_id < nr_devices; device_id++) {
+ vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+
+ its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
+ itt_base, SZ_64K, true);
+
+ for (event_id = 0; event_id < nr_events; event_id++) {
+ its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
+ event_id, coll_id, intid++);
+
+ coll_id = (coll_id + 1) % test_data.nr_cpus;
+ }
+ }
+}
+
+static void guest_invalidate_all_rdists(void)
+{
+ int i;
+
+ for (i = 0; i < test_data.nr_cpus; i++)
+ its_send_invall_cmd(test_data.cmdq_base_va, i);
+}
+
+static void guest_setup_gic(void)
+{
+ static atomic_int nr_cpus_ready = 0;
+ u32 cpuid = guest_get_vcpuid();
+
+ gic_init(GIC_V3, test_data.nr_cpus);
+ gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
+ test_data.lpi_pend_tables + (cpuid * SZ_64K));
+
+ atomic_fetch_add(&nr_cpus_ready, 1);
+
+ if (cpuid > 0)
+ return;
+
+ while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
+ cpu_relax();
+
+ its_init(test_data.collection_table, SZ_64K,
+ test_data.device_table, SZ_64K,
+ test_data.cmdq_base, SZ_64K);
+
+ guest_setup_its_mappings();
+ guest_invalidate_all_rdists();
+}
+
+static void guest_code(size_t nr_lpis)
+{
+ guest_setup_gic();
+
+ GUEST_SYNC(0);
+
+ /*
+ * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
+ * never getting the stop signal.
+ */
+ while (!READ_ONCE(test_data.request_vcpus_stop))
+ cpu_relax();
+
+ GUEST_DONE();
+}
+
+static void setup_memslot(void)
+{
+ size_t pages;
+ size_t sz;
+
+ /*
+ * For the ITS:
+ * - A single level device table
+ * - A single level collection table
+ * - The command queue
+ * - An ITT for each device
+ */
+ sz = (3 + test_data.nr_devices) * SZ_64K;
+
+ /*
+ * For the redistributors:
+ * - A shared LPI configuration table
+ * - An LPI pending table for each vCPU
+ */
+ sz += (1 + test_data.nr_cpus) * SZ_64K;
+
+ pages = sz / vm->page_size;
+ gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
+ TEST_MEMSLOT_INDEX, pages, 0);
+}
+
+#define LPI_PROP_DEFAULT_PRIO 0xa0
+
+static void configure_lpis(void)
+{
+ size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
+ u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
+ size_t i;
+
+ for (i = 0; i < nr_lpis; i++) {
+ tbl[i] = LPI_PROP_DEFAULT_PRIO |
+ LPI_PROP_GROUP1 |
+ LPI_PROP_ENABLED;
+ }
+}
+
+static void setup_test_data(void)
+{
+ size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+ vm_paddr_t cmdq_base;
+
+ test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
+ TEST_MEMSLOT_INDEX);
+ virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
+ test_data.cmdq_base = cmdq_base;
+ test_data.cmdq_base_va = (void *)cmdq_base;
+
+ test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base, TEST_MEMSLOT_INDEX);
+ configure_lpis();
+
+ test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ sync_global_to_guest(vm, test_data);
+}
+
+static void setup_gic(void)
+{
+ gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
+
+ its_fd = vgic_its_setup(vm);
+}
+
+static void signal_lpi(u32 device_id, u32 event_id)
+{
+ vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+
+ struct kvm_msi msi = {
+ .address_lo = db_addr,
+ .address_hi = db_addr >> 32,
+ .data = event_id,
+ .devid = device_id,
+ .flags = KVM_MSI_VALID_DEVID,
+ };
+
+ /*
+ * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
+ * which for arm64 implies having a valid translation in the ITS.
+ */
+ TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
+ "KVM_SIGNAL_MSI ioctl failed");
+}
+
+static pthread_barrier_t test_setup_barrier;
+
+static void *lpi_worker_thread(void *data)
+{
+ u32 device_id = (size_t)data;
+ u32 event_id;
+ size_t i;
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ for (i = 0; i < nr_iterations; i++)
+ for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
+ signal_lpi(device_id, event_id);
+
+ return NULL;
+}
+
+static void *vcpu_worker_thread(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pthread_barrier_wait(&test_setup_barrier);
+ continue;
+ case UCALL_DONE:
+ return NULL;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall: %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void report_stats(struct timespec delta)
+{
+ double nr_lpis;
+ double time;
+
+ nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
+
+ time = delta.tv_sec;
+ time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
+
+ pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
+}
+
+static void run_test(void)
+{
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_vcpus = test_data.nr_cpus;
+ pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
+ pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
+ struct timespec start, delta;
+ size_t i;
+
+ TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
+
+ pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_join(lpi_threads[i], NULL);
+
+ delta = timespec_elapsed(start);
+ write_guest_global(vm, test_data.request_vcpus_stop, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+
+ report_stats(delta);
+}
+
+static void setup_vm(void)
+{
+ int i;
+
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ for (i = 0; i < test_data.nr_cpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ setup_memslot();
+
+ setup_gic();
+
+ setup_test_data();
+}
+
+static void destroy_vm(void)
+{
+ close(its_fd);
+ close(gic_fd);
+ kvm_vm_free(vm);
+ free(vcpus);
+}
+
+static void pr_usage(const char *name)
+{
+ pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
+ pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
+ pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
+ pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
+ pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
+}
+
+int main(int argc, char **argv)
+{
+ u32 nr_threads;
+ int c;
+
+ while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
+ switch (c) {
+ case 'v':
+ test_data.nr_cpus = atoi(optarg);
+ break;
+ case 'd':
+ test_data.nr_devices = atoi(optarg);
+ break;
+ case 'e':
+ test_data.nr_event_ids = atoi(optarg);
+ break;
+ case 'i':
+ nr_iterations = strtoul(optarg, NULL, 0);
+ break;
+ case 'h':
+ default:
+ pr_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ nr_threads = test_data.nr_cpus + test_data.nr_devices;
+ if (nr_threads > get_nprocs())
+ pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
+ nr_threads, get_nprocs());
+
+ setup_vm();
+
+ run_test();
+
+ destroy_vm();
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX 31
+
+struct vpmu_vm {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ int gic_fd;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+ uint64_t set_reg_id;
+ uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+ return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
+}
+
+static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+{
+ u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+ uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+ if (n)
+ mask |= GENMASK(n - 1, 0);
+ return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevcntr_el0);
+ isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevtyper_el0);
+ isb();
+}
+
+static void pmu_disable_reset(void)
+{
+ uint64_t pmcr = read_sysreg(pmcr_el0);
+
+ /* Reset all counters, disabling them */
+ pmcr &= ~ARMV8_PMU_PMCR_E;
+ write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+ isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+ isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, READ_PMEVTYPERN);
+ return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+ isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+ /* A function to be used to read PMEVTCNTR<n>_EL0 */
+ unsigned long (*read_cntr)(int idx);
+ /* A function to be used to write PMEVTCNTR<n>_EL0 */
+ void (*write_cntr)(int idx, unsigned long val);
+ /* A function to be used to read PMEVTYPER<n>_EL0 */
+ unsigned long (*read_typer)(int idx);
+ /* A function to be used to write PMEVTYPER<n>_EL0 */
+ void (*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+ /* test with all direct accesses */
+ { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+ /* test with all indirect accesses */
+ { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+ /* read with direct accesses, and write with indirect accesses */
+ { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+ /* read with indirect accesses, and write with direct accesses */
+ { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \
+{ \
+ uint64_t _tval = read_sysreg(regname); \
+ \
+ if (set_expected) \
+ __GUEST_ASSERT((_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
+ _tval, mask, set_expected); \
+ else \
+ __GUEST_ASSERT(!(_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
+ _tval, mask, set_expected); \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+ GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+ uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+ bool set_expected = false;
+
+ if (set_op) {
+ write_sysreg(test_bit, pmcntenset_el0);
+ write_sysreg(test_bit, pmintenset_el1);
+ write_sysreg(test_bit, pmovsset_el0);
+
+ /* The bit will be set only if the counter is implemented */
+ pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+ set_expected = (pmc_idx < pmcr_n) ? true : false;
+ } else {
+ write_sysreg(test_bit, pmcntenclr_el0);
+ write_sysreg(test_bit, pmintenclr_el1);
+ write_sysreg(test_bit, pmovsclr_el0);
+ }
+ check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ uint64_t write_data, read_data;
+
+ /* Disable all PMCs and reset all PMCs to zero. */
+ pmu_disable_reset();
+
+ /*
+ * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+ */
+
+ /* Make sure that the bit in those registers are set to 0 */
+ test_bitmap_pmu_regs(pmc_idx, false);
+ /* Test if setting the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, true);
+ /* Test if clearing the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, false);
+
+ /*
+ * Tests for reading/writing the event type register.
+ */
+
+ /*
+ * Set the event type register to an arbitrary value just for testing
+ * of reading/writing the register.
+ * Arm ARM says that for the event from 0x0000 to 0x003F,
+ * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+ * the value written to the field even when the specified event
+ * is not supported.
+ */
+ write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+ acc->write_typer(pmc_idx, write_data);
+ read_data = acc->read_typer(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+ /*
+ * Tests for reading/writing the event count register.
+ */
+
+ read_data = acc->read_cntr(pmc_idx);
+
+ /* The count value must be 0, as it is disabled and reset */
+ __GUEST_ASSERT(read_data == 0,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+ write_data = read_data + pmc_idx + 0x12345;
+ acc->write_cntr(pmc_idx, write_data);
+ read_data = acc->read_cntr(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC (-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+ uint64_t esr, ec;
+
+ esr = read_sysreg(esr_el1);
+ ec = ESR_ELx_EC(esr);
+
+ __GUEST_ASSERT(expected_ec == ec,
+ "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+ regs->pc, esr, ec, expected_ec);
+
+ /* skip the trapping instruction */
+ regs->pc += 4;
+
+ /* Use INVALID_EC to indicate an exception occurred */
+ expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops) \
+({ \
+ GUEST_ASSERT(ec != INVALID_EC); \
+ WRITE_ONCE(expected_ec, ec); \
+ dsb(ish); \
+ ops; \
+ GUEST_ASSERT(expected_ec == INVALID_EC); \
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ /*
+ * Reading/writing the event count/type registers should cause
+ * an UNDEFINED exception.
+ */
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+ /*
+ * The bit corresponding to the (unimplemented) counter in
+ * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+ */
+ test_bitmap_pmu_regs(pmc_idx, 1);
+ test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+ uint64_t pmcr, pmcr_n, unimp_mask;
+ int i, pmc;
+
+ __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+ "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
+ expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+ pmcr = read_sysreg(pmcr_el0);
+ pmcr_n = get_pmcr_n(pmcr);
+
+ /* Make sure that PMCR_EL0.N indicates the value userspace set */
+ __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+ "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+ expected_pmcr_n, pmcr_n);
+
+ /*
+ * Make sure that (RAZ) bits corresponding to unimplemented event
+ * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+ * to zero.
+ * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+ */
+ unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+ check_bitmap_pmu_regs(unimp_mask, false);
+
+ /*
+ * Tests for reading/writing PMU registers for implemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = 0; pmc < pmcr_n; pmc++)
+ test_access_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ /*
+ * Tests for reading/writing PMU registers for unimplemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+ test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ GUEST_DONE();
+}
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+ struct kvm_vcpu_init init;
+ uint8_t pmuver, ec;
+ uint64_t dfr0, irq = 23;
+ struct kvm_device_attr irq_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+ .addr = (uint64_t)&irq,
+ };
+ struct kvm_device_attr init_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+ };
+
+ /* The test creates the vpmu_vm multiple times. Ensure a clean state */
+ memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+ vpmu_vm.vm = vm_create(1);
+ vm_init_descriptor_tables(vpmu_vm.vm);
+ for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
+ vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+ guest_sync_handler);
+ }
+
+ /* Create vCPU with PMUv3 */
+ vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+ vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+ vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
+ __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
+ "Failed to create vgic-v3, skipping");
+
+ /* Make sure that PMUv3 support is indicated in the ID register */
+ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+ pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+ pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+ "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+ /* Initialize vPMU */
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+ close(vpmu_vm.gic_fd);
+ kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+ struct ucall uc;
+
+ vcpu_args_set(vcpu, 1, pmcr_n);
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t pmcr, pmcr_orig;
+
+ create_vpmu_vm(guest_code);
+ vcpu = vpmu_vm.vcpu;
+
+ pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+ pmcr = pmcr_orig;
+
+ /*
+ * Setting a larger value of PMCR.N should not modify the field, and
+ * return a success.
+ */
+ set_pmcr_n(&pmcr, pmcr_n);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
+ pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+
+ if (expect_fail)
+ TEST_ASSERT(pmcr_orig == pmcr,
+ "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
+ pmcr, pmcr_n);
+ else
+ TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
+ "Failed to update PMCR.N to %lu (received: %lu)",
+ pmcr_n, get_pmcr_n(pmcr));
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+ uint64_t sp;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_init init;
+
+ pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ /* Save the initial sp to restore them later to run the guest again */
+ sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
+
+ run_vcpu(vcpu, pmcr_n);
+
+ /*
+ * Reset and re-initialize the vCPU, and run the guest code again to
+ * check if PMCR_EL0.N is preserved.
+ */
+ vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ aarch64_vcpu_setup(vcpu, &init);
+ vcpu_init_descriptor_tables(vcpu);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+ run_vcpu(vcpu, pmcr_n);
+
+ destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+ PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+ PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+ PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+ uint64_t set_reg_id, clr_reg_id, reg_val;
+ uint64_t valid_counters_mask, max_counters_mask;
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ valid_counters_mask = get_counters_mask(pmcr_n);
+ max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+ for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+ set_reg_id = validity_check_reg_sets[i].set_reg_id;
+ clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+ /*
+ * Test if the 'set' and 'clr' variants of the registers
+ * are initialized based on the number of valid counters.
+ */
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+ /*
+ * Using the 'set' variant, force-set the register to the
+ * max number of possible counters and test if KVM discards
+ * the bits for unimplemented counters as it should.
+ */
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+ }
+
+ destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+ pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+ destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+ uint64_t pmcr;
+
+ create_vpmu_vm(guest_code);
+ pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+ destroy_vpmu_vm();
+ return get_pmcr_n(pmcr);
+}
+
+int main(void)
+{
+ uint64_t i, pmcr_n;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+
+ pmcr_n = get_pmcr_n_limit();
+ for (i = 0; i <= pmcr_n; i++) {
+ run_access_test(i);
+ run_pmregs_validity_test(i);
+ }
+
+ for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ run_error_test(i);
+
+ return 0;
+}
#include "ucall_common.h"
#ifdef __aarch64__
-#include "aarch64/vgic.h"
+#include "arm64/vgic.h"
static int gic_fd;
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Timer specific interface
- */
-
-#ifndef SELFTEST_KVM_ARCH_TIMER_H
-#define SELFTEST_KVM_ARCH_TIMER_H
-
-#include "processor.h"
-
-enum arch_timer {
- VIRTUAL,
- PHYSICAL,
-};
-
-#define CTL_ENABLE (1 << 0)
-#define CTL_IMASK (1 << 1)
-#define CTL_ISTATUS (1 << 2)
-
-#define msec_to_cycles(msec) \
- (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
-
-#define usec_to_cycles(usec) \
- (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
-
-#define cycles_to_usec(cycles) \
- ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
-
-static inline uint32_t timer_get_cntfrq(void)
-{
- return read_sysreg(cntfrq_el0);
-}
-
-static inline uint64_t timer_get_cntct(enum arch_timer timer)
-{
- isb();
-
- switch (timer) {
- case VIRTUAL:
- return read_sysreg(cntvct_el0);
- case PHYSICAL:
- return read_sysreg(cntpct_el0);
- default:
- GUEST_FAIL("Unexpected timer type = %u", timer);
- }
-
- /* We should not reach here */
- return 0;
-}
-
-static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
-{
- switch (timer) {
- case VIRTUAL:
- write_sysreg(cval, cntv_cval_el0);
- break;
- case PHYSICAL:
- write_sysreg(cval, cntp_cval_el0);
- break;
- default:
- GUEST_FAIL("Unexpected timer type = %u", timer);
- }
-
- isb();
-}
-
-static inline uint64_t timer_get_cval(enum arch_timer timer)
-{
- switch (timer) {
- case VIRTUAL:
- return read_sysreg(cntv_cval_el0);
- case PHYSICAL:
- return read_sysreg(cntp_cval_el0);
- default:
- GUEST_FAIL("Unexpected timer type = %u", timer);
- }
-
- /* We should not reach here */
- return 0;
-}
-
-static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
-{
- switch (timer) {
- case VIRTUAL:
- write_sysreg(tval, cntv_tval_el0);
- break;
- case PHYSICAL:
- write_sysreg(tval, cntp_tval_el0);
- break;
- default:
- GUEST_FAIL("Unexpected timer type = %u", timer);
- }
-
- isb();
-}
-
-static inline int32_t timer_get_tval(enum arch_timer timer)
-{
- isb();
- switch (timer) {
- case VIRTUAL:
- return read_sysreg(cntv_tval_el0);
- case PHYSICAL:
- return read_sysreg(cntp_tval_el0);
- default:
- GUEST_FAIL("Could not get timer %d\n", timer);
- }
-
- /* We should not reach here */
- return 0;
-}
-
-static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
-{
- switch (timer) {
- case VIRTUAL:
- write_sysreg(ctl, cntv_ctl_el0);
- break;
- case PHYSICAL:
- write_sysreg(ctl, cntp_ctl_el0);
- break;
- default:
- GUEST_FAIL("Unexpected timer type = %u", timer);
- }
-
- isb();
-}
-
-static inline uint32_t timer_get_ctl(enum arch_timer timer)
-{
- switch (timer) {
- case VIRTUAL:
- return read_sysreg(cntv_ctl_el0);
- case PHYSICAL:
- return read_sysreg(cntp_ctl_el0);
- default:
- GUEST_FAIL("Unexpected timer type = %u", timer);
- }
-
- /* We should not reach here */
- return 0;
-}
-
-static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
-{
- uint64_t now_ct = timer_get_cntct(timer);
- uint64_t next_ct = now_ct + msec_to_cycles(msec);
-
- timer_set_cval(timer, next_ct);
-}
-
-static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
-{
- timer_set_tval(timer, msec_to_cycles(msec));
-}
-
-#endif /* SELFTEST_KVM_ARCH_TIMER_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM simple delay routines
- */
-
-#ifndef SELFTEST_KVM_ARM_DELAY_H
-#define SELFTEST_KVM_ARM_DELAY_H
-
-#include "arch_timer.h"
-
-static inline void __delay(uint64_t cycles)
-{
- enum arch_timer timer = VIRTUAL;
- uint64_t start = timer_get_cntct(timer);
-
- while ((timer_get_cntct(timer) - start) < cycles)
- cpu_relax();
-}
-
-static inline void udelay(unsigned long usec)
-{
- __delay(usec_to_cycles(usec));
-}
-
-#endif /* SELFTEST_KVM_ARM_DELAY_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) specific defines
- */
-
-#ifndef SELFTEST_KVM_GIC_H
-#define SELFTEST_KVM_GIC_H
-
-#include <asm/kvm.h>
-
-enum gic_type {
- GIC_V3,
- GIC_TYPE_MAX,
-};
-
-/*
- * Note that the redistributor frames are at the end, as the range scales
- * with the number of vCPUs in the VM.
- */
-#define GITS_BASE_GPA 0x8000000ULL
-#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
-#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
-
-/* The GIC is identity-mapped into the guest at the time of setup. */
-#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA)
-#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA)
-#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA)
-
-#define MIN_SGI 0
-#define MIN_PPI 16
-#define MIN_SPI 32
-#define MAX_SPI 1019
-#define IAR_SPURIOUS 1023
-
-#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
-#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
-#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
-
-void gic_init(enum gic_type type, unsigned int nr_cpus);
-void gic_irq_enable(unsigned int intid);
-void gic_irq_disable(unsigned int intid);
-unsigned int gic_get_and_ack_irq(void);
-void gic_set_eoi(unsigned int intid);
-void gic_set_dir(unsigned int intid);
-
-/*
- * Sets the EOI mode. When split is false, EOI just drops the priority. When
- * split is true, EOI drops the priority and deactivates the interrupt.
- */
-void gic_set_eoi_split(bool split);
-void gic_set_priority_mask(uint64_t mask);
-void gic_set_priority(uint32_t intid, uint32_t prio);
-void gic_irq_set_active(unsigned int intid);
-void gic_irq_clear_active(unsigned int intid);
-bool gic_irq_get_active(unsigned int intid);
-void gic_irq_set_pending(unsigned int intid);
-void gic_irq_clear_pending(unsigned int intid);
-bool gic_irq_get_pending(unsigned int intid);
-void gic_irq_set_config(unsigned int intid, bool is_edge);
-
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
- vm_paddr_t pend_table);
-
-#endif /* SELFTEST_KVM_GIC_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-#ifndef __SELFTESTS_GIC_V3_H
-#define __SELFTESTS_GIC_V3_H
-
-/*
- * Distributor registers. We assume we're running non-secure, with ARE
- * being set. Secure-only and non-ARE registers are not described.
- */
-#define GICD_CTLR 0x0000
-#define GICD_TYPER 0x0004
-#define GICD_IIDR 0x0008
-#define GICD_TYPER2 0x000C
-#define GICD_STATUSR 0x0010
-#define GICD_SETSPI_NSR 0x0040
-#define GICD_CLRSPI_NSR 0x0048
-#define GICD_SETSPI_SR 0x0050
-#define GICD_CLRSPI_SR 0x0058
-#define GICD_IGROUPR 0x0080
-#define GICD_ISENABLER 0x0100
-#define GICD_ICENABLER 0x0180
-#define GICD_ISPENDR 0x0200
-#define GICD_ICPENDR 0x0280
-#define GICD_ISACTIVER 0x0300
-#define GICD_ICACTIVER 0x0380
-#define GICD_IPRIORITYR 0x0400
-#define GICD_ICFGR 0x0C00
-#define GICD_IGRPMODR 0x0D00
-#define GICD_NSACR 0x0E00
-#define GICD_IGROUPRnE 0x1000
-#define GICD_ISENABLERnE 0x1200
-#define GICD_ICENABLERnE 0x1400
-#define GICD_ISPENDRnE 0x1600
-#define GICD_ICPENDRnE 0x1800
-#define GICD_ISACTIVERnE 0x1A00
-#define GICD_ICACTIVERnE 0x1C00
-#define GICD_IPRIORITYRnE 0x2000
-#define GICD_ICFGRnE 0x3000
-#define GICD_IROUTER 0x6000
-#define GICD_IROUTERnE 0x8000
-#define GICD_IDREGS 0xFFD0
-#define GICD_PIDR2 0xFFE8
-
-#define ESPI_BASE_INTID 4096
-
-/*
- * Those registers are actually from GICv2, but the spec demands that they
- * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
- */
-#define GICD_ITARGETSR 0x0800
-#define GICD_SGIR 0x0F00
-#define GICD_CPENDSGIR 0x0F10
-#define GICD_SPENDSGIR 0x0F20
-
-#define GICD_CTLR_RWP (1U << 31)
-#define GICD_CTLR_nASSGIreq (1U << 8)
-#define GICD_CTLR_DS (1U << 6)
-#define GICD_CTLR_ARE_NS (1U << 4)
-#define GICD_CTLR_ENABLE_G1A (1U << 1)
-#define GICD_CTLR_ENABLE_G1 (1U << 0)
-
-#define GICD_IIDR_IMPLEMENTER_SHIFT 0
-#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
-#define GICD_IIDR_REVISION_SHIFT 12
-#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
-#define GICD_IIDR_VARIANT_SHIFT 16
-#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
-#define GICD_IIDR_PRODUCT_ID_SHIFT 24
-#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
-
-
-/*
- * In systems with a single security state (what we emulate in KVM)
- * the meaning of the interrupt group enable bits is slightly different
- */
-#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
-#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
-
-#define GICD_TYPER_RSS (1U << 26)
-#define GICD_TYPER_LPIS (1U << 17)
-#define GICD_TYPER_MBIS (1U << 16)
-#define GICD_TYPER_ESPI (1U << 8)
-
-#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
-#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1)
-#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
-#define GICD_TYPER_ESPIS(typer) \
- (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
-
-#define GICD_TYPER2_nASSGIcap (1U << 8)
-#define GICD_TYPER2_VIL (1U << 7)
-#define GICD_TYPER2_VID GENMASK(4, 0)
-
-#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
-#define GICD_IROUTER_SPI_MODE_ANY (1U << 31)
-
-#define GIC_PIDR2_ARCH_MASK 0xf0
-#define GIC_PIDR2_ARCH_GICv3 0x30
-#define GIC_PIDR2_ARCH_GICv4 0x40
-
-#define GIC_V3_DIST_SIZE 0x10000
-
-#define GIC_PAGE_SIZE_4K 0ULL
-#define GIC_PAGE_SIZE_16K 1ULL
-#define GIC_PAGE_SIZE_64K 2ULL
-#define GIC_PAGE_SIZE_MASK 3ULL
-
-/*
- * Re-Distributor registers, offsets from RD_base
- */
-#define GICR_CTLR GICD_CTLR
-#define GICR_IIDR 0x0004
-#define GICR_TYPER 0x0008
-#define GICR_STATUSR GICD_STATUSR
-#define GICR_WAKER 0x0014
-#define GICR_SETLPIR 0x0040
-#define GICR_CLRLPIR 0x0048
-#define GICR_PROPBASER 0x0070
-#define GICR_PENDBASER 0x0078
-#define GICR_INVLPIR 0x00A0
-#define GICR_INVALLR 0x00B0
-#define GICR_SYNCR 0x00C0
-#define GICR_IDREGS GICD_IDREGS
-#define GICR_PIDR2 GICD_PIDR2
-
-#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
-#define GICR_CTLR_CES (1UL << 1)
-#define GICR_CTLR_IR (1UL << 2)
-#define GICR_CTLR_RWP (1UL << 3)
-
-#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff)
-
-#define EPPI_BASE_INTID 1056
-
-#define GICR_TYPER_NR_PPIS(r) \
- ({ \
- unsigned int __ppinum = ((r) >> 27) & 0x1f; \
- unsigned int __nr_ppis = 16; \
- if (__ppinum == 1 || __ppinum == 2) \
- __nr_ppis += __ppinum * 32; \
- \
- __nr_ppis; \
- })
-
-#define GICR_WAKER_ProcessorSleep (1U << 1)
-#define GICR_WAKER_ChildrenAsleep (1U << 2)
-
-#define GIC_BASER_CACHE_nCnB 0ULL
-#define GIC_BASER_CACHE_SameAsInner 0ULL
-#define GIC_BASER_CACHE_nC 1ULL
-#define GIC_BASER_CACHE_RaWt 2ULL
-#define GIC_BASER_CACHE_RaWb 3ULL
-#define GIC_BASER_CACHE_WaWt 4ULL
-#define GIC_BASER_CACHE_WaWb 5ULL
-#define GIC_BASER_CACHE_RaWaWt 6ULL
-#define GIC_BASER_CACHE_RaWaWb 7ULL
-#define GIC_BASER_CACHE_MASK 7ULL
-#define GIC_BASER_NonShareable 0ULL
-#define GIC_BASER_InnerShareable 1ULL
-#define GIC_BASER_OuterShareable 2ULL
-#define GIC_BASER_SHAREABILITY_MASK 3ULL
-
-#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \
- (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
-
-#define GIC_BASER_SHAREABILITY(reg, type) \
- (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
-
-/* encode a size field of width @w containing @n - 1 units */
-#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
-
-#define GICR_PROPBASER_SHAREABILITY_SHIFT (10)
-#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7)
-#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56)
-#define GICR_PROPBASER_SHAREABILITY_MASK \
- GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
-#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
-#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
-#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_PROPBASER_InnerShareable \
- GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
-
-#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
-#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
-#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
-#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
-#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
-#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
-#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
-#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
-
-#define GICR_PROPBASER_IDBITS_MASK (0x1f)
-#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12))
-#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16))
-
-#define GICR_PENDBASER_SHAREABILITY_SHIFT (10)
-#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56)
-#define GICR_PENDBASER_SHAREABILITY_MASK \
- GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
-#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
-#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_PENDBASER_InnerShareable \
- GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
-
-#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
-#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
-#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
-#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
-#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
-#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
-#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
-#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
-
-#define GICR_PENDBASER_PTZ BIT_ULL(62)
-
-/*
- * Re-Distributor registers, offsets from SGI_base
- */
-#define GICR_IGROUPR0 GICD_IGROUPR
-#define GICR_ISENABLER0 GICD_ISENABLER
-#define GICR_ICENABLER0 GICD_ICENABLER
-#define GICR_ISPENDR0 GICD_ISPENDR
-#define GICR_ICPENDR0 GICD_ICPENDR
-#define GICR_ISACTIVER0 GICD_ISACTIVER
-#define GICR_ICACTIVER0 GICD_ICACTIVER
-#define GICR_IPRIORITYR0 GICD_IPRIORITYR
-#define GICR_ICFGR0 GICD_ICFGR
-#define GICR_IGRPMODR0 GICD_IGRPMODR
-#define GICR_NSACR GICD_NSACR
-
-#define GICR_TYPER_PLPIS (1U << 0)
-#define GICR_TYPER_VLPIS (1U << 1)
-#define GICR_TYPER_DIRTY (1U << 2)
-#define GICR_TYPER_DirectLPIS (1U << 3)
-#define GICR_TYPER_LAST (1U << 4)
-#define GICR_TYPER_RVPEID (1U << 7)
-#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24)
-#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32)
-
-#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0)
-#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32)
-#define GICR_INVLPIR_V GENMASK_ULL(63, 63)
-
-#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID
-#define GICR_INVALLR_V GICR_INVLPIR_V
-
-#define GIC_V3_REDIST_SIZE 0x20000
-
-#define LPI_PROP_GROUP1 (1 << 1)
-#define LPI_PROP_ENABLED (1 << 0)
-
-/*
- * Re-Distributor registers, offsets from VLPI_base
- */
-#define GICR_VPROPBASER 0x0070
-
-#define GICR_VPROPBASER_IDBITS_MASK 0x1f
-
-#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10)
-#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7)
-#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56)
-
-#define GICR_VPROPBASER_SHAREABILITY_MASK \
- GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
-#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
-#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
-#define GICR_VPROPBASER_CACHEABILITY_MASK \
- GICR_VPROPBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_VPROPBASER_InnerShareable \
- GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
-
-#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
-#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
-#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
-#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
-#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
-#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
-#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
-#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
-
-/*
- * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
- * VPROPBASER and ITS_BASER. Just not quite any of the two.
- */
-#define GICR_VPROPBASER_4_1_VALID (1ULL << 63)
-#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
-#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55)
-#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53)
-#define GICR_VPROPBASER_4_1_Z (1ULL << 52)
-#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12)
-#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0)
-
-#define GICR_VPENDBASER 0x0078
-
-#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10)
-#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7)
-#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56)
-#define GICR_VPENDBASER_SHAREABILITY_MASK \
- GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
-#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
-#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
-#define GICR_VPENDBASER_CACHEABILITY_MASK \
- GICR_VPENDBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_VPENDBASER_NonShareable \
- GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
-
-#define GICR_VPENDBASER_InnerShareable \
- GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
-
-#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
-#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
-#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
-#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
-#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
-#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
-#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
-#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
-
-#define GICR_VPENDBASER_Dirty (1ULL << 60)
-#define GICR_VPENDBASER_PendingLast (1ULL << 61)
-#define GICR_VPENDBASER_IDAI (1ULL << 62)
-#define GICR_VPENDBASER_Valid (1ULL << 63)
-
-/*
- * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
- * also use the above Valid, PendingLast and Dirty.
- */
-#define GICR_VPENDBASER_4_1_DB (1ULL << 62)
-#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59)
-#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58)
-#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0)
-
-#define GICR_VSGIR 0x0080
-
-#define GICR_VSGIR_VPEID GENMASK(15, 0)
-
-#define GICR_VSGIPENDR 0x0088
-
-#define GICR_VSGIPENDR_BUSY (1U << 31)
-#define GICR_VSGIPENDR_PENDING GENMASK(15, 0)
-
-/*
- * ITS registers, offsets from ITS_base
- */
-#define GITS_CTLR 0x0000
-#define GITS_IIDR 0x0004
-#define GITS_TYPER 0x0008
-#define GITS_MPIDR 0x0018
-#define GITS_CBASER 0x0080
-#define GITS_CWRITER 0x0088
-#define GITS_CREADR 0x0090
-#define GITS_BASER 0x0100
-#define GITS_IDREGS_BASE 0xffd0
-#define GITS_PIDR0 0xffe0
-#define GITS_PIDR1 0xffe4
-#define GITS_PIDR2 GICR_PIDR2
-#define GITS_PIDR4 0xffd0
-#define GITS_CIDR0 0xfff0
-#define GITS_CIDR1 0xfff4
-#define GITS_CIDR2 0xfff8
-#define GITS_CIDR3 0xfffc
-
-#define GITS_TRANSLATER 0x10040
-
-#define GITS_SGIR 0x20020
-
-#define GITS_SGIR_VPEID GENMASK_ULL(47, 32)
-#define GITS_SGIR_VINTID GENMASK_ULL(3, 0)
-
-#define GITS_CTLR_ENABLE (1U << 0)
-#define GITS_CTLR_ImDe (1U << 1)
-#define GITS_CTLR_ITS_NUMBER_SHIFT 4
-#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
-#define GITS_CTLR_QUIESCENT (1U << 31)
-
-#define GITS_TYPER_PLPIS (1UL << 0)
-#define GITS_TYPER_VLPIS (1UL << 1)
-#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
-#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4)
-#define GITS_TYPER_IDBITS_SHIFT 8
-#define GITS_TYPER_DEVBITS_SHIFT 13
-#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
-#define GITS_TYPER_PTA (1UL << 19)
-#define GITS_TYPER_HCC_SHIFT 24
-#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
-#define GITS_TYPER_VMOVP (1ULL << 37)
-#define GITS_TYPER_VMAPP (1ULL << 40)
-#define GITS_TYPER_SVPET GENMASK_ULL(42, 41)
-
-#define GITS_IIDR_REV_SHIFT 12
-#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT)
-#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
-#define GITS_IIDR_PRODUCTID_SHIFT 24
-
-#define GITS_CBASER_VALID (1ULL << 63)
-#define GITS_CBASER_SHAREABILITY_SHIFT (10)
-#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59)
-#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53)
-#define GITS_CBASER_SHAREABILITY_MASK \
- GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
-#define GITS_CBASER_INNER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
-#define GITS_CBASER_OUTER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
-#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
-
-#define GITS_CBASER_InnerShareable \
- GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
-
-#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
-#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
-#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
-#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
-#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
-#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
-#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
-#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
-
-#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
-
-#define GITS_BASER_NR_REGS 8
-
-#define GITS_BASER_VALID (1ULL << 63)
-#define GITS_BASER_INDIRECT (1ULL << 62)
-
-#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59)
-#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53)
-#define GITS_BASER_INNER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
-#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK
-#define GITS_BASER_OUTER_CACHEABILITY_MASK \
- GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
-#define GITS_BASER_SHAREABILITY_MASK \
- GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
-
-#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
-#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
-#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
-#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
-#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
-#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
-#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
-#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
-
-#define GITS_BASER_TYPE_SHIFT (56)
-#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
-#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
-#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
-#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
-#define GITS_BASER_PHYS_52_to_48(phys) \
- (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
-#define GITS_BASER_ADDR_48_to_52(baser) \
- (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
-
-#define GITS_BASER_SHAREABILITY_SHIFT (10)
-#define GITS_BASER_InnerShareable \
- GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
-#define GITS_BASER_PAGE_SIZE_SHIFT (8)
-#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
-#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K)
-#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K)
-#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K)
-#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK)
-#define GITS_BASER_PAGES_MAX 256
-#define GITS_BASER_PAGES_SHIFT (0)
-#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1)
-
-#define GITS_BASER_TYPE_NONE 0
-#define GITS_BASER_TYPE_DEVICE 1
-#define GITS_BASER_TYPE_VCPU 2
-#define GITS_BASER_TYPE_RESERVED3 3
-#define GITS_BASER_TYPE_COLLECTION 4
-#define GITS_BASER_TYPE_RESERVED5 5
-#define GITS_BASER_TYPE_RESERVED6 6
-#define GITS_BASER_TYPE_RESERVED7 7
-
-#define GITS_LVL1_ENTRY_SIZE (8UL)
-
-/*
- * ITS commands
- */
-#define GITS_CMD_MAPD 0x08
-#define GITS_CMD_MAPC 0x09
-#define GITS_CMD_MAPTI 0x0a
-#define GITS_CMD_MAPI 0x0b
-#define GITS_CMD_MOVI 0x01
-#define GITS_CMD_DISCARD 0x0f
-#define GITS_CMD_INV 0x0c
-#define GITS_CMD_MOVALL 0x0e
-#define GITS_CMD_INVALL 0x0d
-#define GITS_CMD_INT 0x03
-#define GITS_CMD_CLEAR 0x04
-#define GITS_CMD_SYNC 0x05
-
-/*
- * GICv4 ITS specific commands
- */
-#define GITS_CMD_GICv4(x) ((x) | 0x20)
-#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL)
-#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC)
-#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI)
-#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI)
-#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC)
-/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
-#define GITS_CMD_VMOVP GITS_CMD_GICv4(2)
-#define GITS_CMD_VSGI GITS_CMD_GICv4(3)
-#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe)
-
-/*
- * ITS error numbers
- */
-#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
-#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
-#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
-#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
-#define E_ITS_MAPD_DEVICE_OOR 0x010801
-#define E_ITS_MAPD_ITTSIZE_OOR 0x010802
-#define E_ITS_MAPC_PROCNUM_OOR 0x010902
-#define E_ITS_MAPC_COLLECTION_OOR 0x010903
-#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04
-#define E_ITS_MAPTI_ID_OOR 0x010a05
-#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06
-#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07
-#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09
-#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01
-#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07
-
-/*
- * CPU interface registers
- */
-#define ICC_CTLR_EL1_EOImode_SHIFT (1)
-#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_CBPR_SHIFT 0
-#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
-#define ICC_CTLR_EL1_PMHE_SHIFT 6
-#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT)
-#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
-#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
-#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
-#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
-#define ICC_CTLR_EL1_SEIS_SHIFT 14
-#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
-#define ICC_CTLR_EL1_A3V_SHIFT 15
-#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
-#define ICC_CTLR_EL1_RSS (0x1 << 18)
-#define ICC_CTLR_EL1_ExtRange (0x1 << 19)
-#define ICC_PMR_EL1_SHIFT 0
-#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT)
-#define ICC_BPR0_EL1_SHIFT 0
-#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT)
-#define ICC_BPR1_EL1_SHIFT 0
-#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT)
-#define ICC_IGRPEN0_EL1_SHIFT 0
-#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
-#define ICC_IGRPEN1_EL1_SHIFT 0
-#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
-#define ICC_SRE_EL1_DIB (1U << 2)
-#define ICC_SRE_EL1_DFB (1U << 1)
-#define ICC_SRE_EL1_SRE (1U << 0)
-
-/* These are for GICv2 emulation only */
-#define GICH_LR_VIRTUALID (0x3ffUL << 0)
-#define GICH_LR_PHYSID_CPUID_SHIFT (10)
-#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
-
-#define ICC_IAR1_EL1_SPURIOUS 0x3ff
-
-#define ICC_SRE_EL2_SRE (1 << 0)
-#define ICC_SRE_EL2_ENABLE (1 << 3)
-
-#define ICC_SGI1R_TARGET_LIST_SHIFT 0
-#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
-#define ICC_SGI1R_AFFINITY_1_SHIFT 16
-#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
-#define ICC_SGI1R_SGI_ID_SHIFT 24
-#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
-#define ICC_SGI1R_AFFINITY_2_SHIFT 32
-#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
-#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
-#define ICC_SGI1R_RS_SHIFT 44
-#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT)
-#define ICC_SGI1R_AFFINITY_3_SHIFT 48
-#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __SELFTESTS_GIC_V3_ITS_H__
-#define __SELFTESTS_GIC_V3_ITS_H__
-
-#include <linux/sizes.h>
-
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
- vm_paddr_t device_tbl, size_t device_tbl_sz,
- vm_paddr_t cmdq, size_t cmdq_size);
-
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
- size_t itt_size, bool valid);
-void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
-void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
- u32 collection_id, u32 intid);
-void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
-
-#endif // __SELFTESTS_GIC_V3_ITS_H__
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-struct kvm_vm_arch {};
-
-#endif // SELFTEST_KVM_UTIL_ARCH_H
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AArch64 processor specific defines
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include "kvm_util.h"
-#include "ucall_common.h"
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <asm/brk-imm.h>
-#include <asm/esr.h>
-#include <asm/sysreg.h>
-
-
-#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-/*
- * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
- * SYS_* register definitions in asm/sysreg.h to use in KVM
- * calls such as vcpu_get_reg() and vcpu_set_reg().
- */
-#define KVM_ARM64_SYS_REG(sys_reg_id) \
- ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \
- sys_reg_Op1(sys_reg_id), \
- sys_reg_CRn(sys_reg_id), \
- sys_reg_CRm(sys_reg_id), \
- sys_reg_Op2(sys_reg_id))
-
-/*
- * Default MAIR
- * index attribute
- * DEVICE_nGnRnE 0 0000:0000
- * DEVICE_nGnRE 1 0000:0100
- * DEVICE_GRE 2 0000:1100
- * NORMAL_NC 3 0100:0100
- * NORMAL 4 1111:1111
- * NORMAL_WT 5 1011:1011
- */
-
-/* Linux doesn't use these memory types, so let's define them. */
-#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
-#define MAIR_ATTR_NORMAL_WT UL(0xbb)
-
-#define MT_DEVICE_nGnRnE 0
-#define MT_DEVICE_nGnRE 1
-#define MT_DEVICE_GRE 2
-#define MT_NORMAL_NC 3
-#define MT_NORMAL 4
-#define MT_NORMAL_WT 5
-
-#define DEFAULT_MAIR_EL1 \
- (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
- MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
- MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
- MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
- MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
- MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
-
-void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_vcpu_init *init, void *guest_code);
-
-struct ex_regs {
- u64 regs[31];
- u64 sp;
- u64 pc;
- u64 pstate;
-};
-
-#define VECTOR_NUM 16
-
-enum {
- VECTOR_SYNC_CURRENT_SP0,
- VECTOR_IRQ_CURRENT_SP0,
- VECTOR_FIQ_CURRENT_SP0,
- VECTOR_ERROR_CURRENT_SP0,
-
- VECTOR_SYNC_CURRENT,
- VECTOR_IRQ_CURRENT,
- VECTOR_FIQ_CURRENT,
- VECTOR_ERROR_CURRENT,
-
- VECTOR_SYNC_LOWER_64,
- VECTOR_IRQ_LOWER_64,
- VECTOR_FIQ_LOWER_64,
- VECTOR_ERROR_LOWER_64,
-
- VECTOR_SYNC_LOWER_32,
- VECTOR_IRQ_LOWER_32,
- VECTOR_FIQ_LOWER_32,
- VECTOR_ERROR_LOWER_32,
-};
-
-#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
- (v) == VECTOR_SYNC_CURRENT || \
- (v) == VECTOR_SYNC_LOWER_64 || \
- (v) == VECTOR_SYNC_LOWER_32)
-
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
- uint32_t *ipa16k, uint32_t *ipa64k);
-
-void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
-
-typedef void(*handler_fn)(struct ex_regs *);
-void vm_install_exception_handler(struct kvm_vm *vm,
- int vector, handler_fn handler);
-void vm_install_sync_handler(struct kvm_vm *vm,
- int vector, int ec, handler_fn handler);
-
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
-
-static inline void cpu_relax(void)
-{
- asm volatile("yield" ::: "memory");
-}
-
-#define isb() asm volatile("isb" : : : "memory")
-#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
-#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
-
-#define dma_wmb() dmb(oshst)
-#define __iowmb() dma_wmb()
-
-#define dma_rmb() dmb(oshld)
-
-#define __iormb(v) \
-({ \
- unsigned long tmp; \
- \
- dma_rmb(); \
- \
- /* \
- * Courtesy of arch/arm64/include/asm/io.h: \
- * Create a dummy control dependency from the IO read to any \
- * later instructions. This ensures that a subsequent call \
- * to udelay() will be ordered due to the ISB in __delay(). \
- */ \
- asm volatile("eor %0, %1, %1\n" \
- "cbnz %0, ." \
- : "=r" (tmp) : "r" ((unsigned long)(v)) \
- : "memory"); \
-})
-
-static __always_inline void __raw_writel(u32 val, volatile void *addr)
-{
- asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
-}
-
-static __always_inline u32 __raw_readl(const volatile void *addr)
-{
- u32 val;
- asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
- return val;
-}
-
-static __always_inline void __raw_writeq(u64 val, volatile void *addr)
-{
- asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
-}
-
-static __always_inline u64 __raw_readq(const volatile void *addr)
-{
- u64 val;
- asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
- return val;
-}
-
-#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-
-#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
-#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
-#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));})
-#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
-
-
-static inline void local_irq_enable(void)
-{
- asm volatile("msr daifclr, #3" : : : "memory");
-}
-
-static inline void local_irq_disable(void)
-{
- asm volatile("msr daifset, #3" : : : "memory");
-}
-
-/**
- * struct arm_smccc_res - Result from SMC/HVC call
- * @a0-a3 result values from registers 0 to 3
- */
-struct arm_smccc_res {
- unsigned long a0;
- unsigned long a1;
- unsigned long a2;
- unsigned long a3;
-};
-
-/**
- * smccc_hvc - Invoke a SMCCC function using the hvc conduit
- * @function_id: the SMCCC function to be called
- * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
- * @res: pointer to write the return values from registers x0-x3
- *
- */
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res);
-
-/**
- * smccc_smc - Invoke a SMCCC function using the smc conduit
- * @function_id: the SMCCC function to be called
- * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
- * @res: pointer to write the return values from registers x0-x3
- *
- */
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res);
-
-/* Execute a Wait For Interrupt instruction. */
-void wfi(void);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
-#define SELFTEST_KVM_ARM64_SPINLOCK_H
-
-struct spinlock {
- int v;
-};
-
-extern void spin_lock(struct spinlock *lock);
-extern void spin_unlock(struct spinlock *lock);
-
-#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON KVM_EXIT_MMIO
-
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-extern vm_vaddr_t *ucall_exit_mmio_addr;
-
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
- WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) host specific defines
- */
-
-#ifndef SELFTEST_KVM_VGIC_H
-#define SELFTEST_KVM_VGIC_H
-
-#include <linux/kvm.h>
-
-#include "kvm_util.h"
-
-#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
- (((uint64_t)(count) << 52) | \
- ((uint64_t)((base) >> 16) << 16) | \
- ((uint64_t)(flags) << 12) | \
- index)
-
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
-
-#define VGIC_MAX_RESERVED 1023
-
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-
-/* The vcpu arg only applies to private interrupts. */
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-
-#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
-
-int vgic_its_setup(struct kvm_vm *vm);
-
-#endif // SELFTEST_KVM_VGIC_H
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+ VIRTUAL,
+ PHYSICAL,
+};
+
+#define CTL_ENABLE (1 << 0)
+#define CTL_IMASK (1 << 1)
+#define CTL_ISTATUS (1 << 2)
+
+#define msec_to_cycles(msec) \
+ (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec) \
+ (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+ ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+ return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+ isb();
+
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntvct_el0);
+ case PHYSICAL:
+ return read_sysreg(cntpct_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(cval, cntv_cval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(cval, cntp_cval_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_cval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_cval_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(tval, cntv_tval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(tval, cntp_tval_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+ isb();
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_tval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_tval_el0);
+ default:
+ GUEST_FAIL("Could not get timer %d\n", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(ctl, cntv_ctl_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(ctl, cntp_ctl_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_ctl_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_ctl_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+ uint64_t now_ct = timer_get_cntct(timer);
+ uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+ timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+ timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+ enum arch_timer timer = VIRTUAL;
+ uint64_t start = timer_get_cntct(timer);
+
+ while ((timer_get_cntct(timer) - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+#include <asm/kvm.h>
+
+enum gic_type {
+ GIC_V3,
+ GIC_TYPE_MAX,
+};
+
+/*
+ * Note that the redistributor frames are at the end, as the range scales
+ * with the number of vCPUs in the VM.
+ */
+#define GITS_BASE_GPA 0x8000000ULL
+#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
+#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
+
+/* The GIC is identity-mapped into the guest at the time of setup. */
+#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA)
+#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA)
+#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA)
+
+#define MIN_SGI 0
+#define MIN_PPI 16
+#define MIN_SPI 32
+#define MAX_SPI 1019
+#define IAR_SPURIOUS 1023
+
+#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
+void gic_init(enum gic_type type, unsigned int nr_cpus);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table);
+
+#endif /* SELFTEST_KVM_GIC_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+#ifndef __SELFTESTS_GIC_V3_H
+#define __SELFTESTS_GIC_V3_H
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR 0x0000
+#define GICD_TYPER 0x0004
+#define GICD_IIDR 0x0008
+#define GICD_TYPER2 0x000C
+#define GICD_STATUSR 0x0010
+#define GICD_SETSPI_NSR 0x0040
+#define GICD_CLRSPI_NSR 0x0048
+#define GICD_SETSPI_SR 0x0050
+#define GICD_CLRSPI_SR 0x0058
+#define GICD_IGROUPR 0x0080
+#define GICD_ISENABLER 0x0100
+#define GICD_ICENABLER 0x0180
+#define GICD_ISPENDR 0x0200
+#define GICD_ICPENDR 0x0280
+#define GICD_ISACTIVER 0x0300
+#define GICD_ICACTIVER 0x0380
+#define GICD_IPRIORITYR 0x0400
+#define GICD_ICFGR 0x0C00
+#define GICD_IGRPMODR 0x0D00
+#define GICD_NSACR 0x0E00
+#define GICD_IGROUPRnE 0x1000
+#define GICD_ISENABLERnE 0x1200
+#define GICD_ICENABLERnE 0x1400
+#define GICD_ISPENDRnE 0x1600
+#define GICD_ICPENDRnE 0x1800
+#define GICD_ISACTIVERnE 0x1A00
+#define GICD_ICACTIVERnE 0x1C00
+#define GICD_IPRIORITYRnE 0x2000
+#define GICD_ICFGRnE 0x3000
+#define GICD_IROUTER 0x6000
+#define GICD_IROUTERnE 0x8000
+#define GICD_IDREGS 0xFFD0
+#define GICD_PIDR2 0xFFE8
+
+#define ESPI_BASE_INTID 4096
+
+/*
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
+ */
+#define GICD_ITARGETSR 0x0800
+#define GICD_SGIR 0x0F00
+#define GICD_CPENDSGIR 0x0F10
+#define GICD_SPENDSGIR 0x0F20
+
+#define GICD_CTLR_RWP (1U << 31)
+#define GICD_CTLR_nASSGIreq (1U << 8)
+#define GICD_CTLR_DS (1U << 6)
+#define GICD_CTLR_ARE_NS (1U << 4)
+#define GICD_CTLR_ENABLE_G1A (1U << 1)
+#define GICD_CTLR_ENABLE_G1 (1U << 0)
+
+#define GICD_IIDR_IMPLEMENTER_SHIFT 0
+#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT 12
+#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT 16
+#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT 24
+#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
+
+#define GICD_TYPER_RSS (1U << 26)
+#define GICD_TYPER_LPIS (1U << 17)
+#define GICD_TYPER_MBIS (1U << 16)
+#define GICD_TYPER_ESPI (1U << 8)
+
+#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1)
+#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_ESPIS(typer) \
+ (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
+
+#define GICD_TYPER2_nASSGIcap (1U << 8)
+#define GICD_TYPER2_VIL (1U << 7)
+#define GICD_TYPER2_VID GENMASK(4, 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY (1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK 0xf0
+#define GIC_PIDR2_ARCH_GICv3 0x30
+#define GIC_PIDR2_ARCH_GICv4 0x40
+
+#define GIC_V3_DIST_SIZE 0x10000
+
+#define GIC_PAGE_SIZE_4K 0ULL
+#define GIC_PAGE_SIZE_16K 1ULL
+#define GIC_PAGE_SIZE_64K 2ULL
+#define GIC_PAGE_SIZE_MASK 3ULL
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR GICD_CTLR
+#define GICR_IIDR 0x0004
+#define GICR_TYPER 0x0008
+#define GICR_STATUSR GICD_STATUSR
+#define GICR_WAKER 0x0014
+#define GICR_SETLPIR 0x0040
+#define GICR_CLRLPIR 0x0048
+#define GICR_PROPBASER 0x0070
+#define GICR_PENDBASER 0x0078
+#define GICR_INVLPIR 0x00A0
+#define GICR_INVALLR 0x00B0
+#define GICR_SYNCR 0x00C0
+#define GICR_IDREGS GICD_IDREGS
+#define GICR_PIDR2 GICD_PIDR2
+
+#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
+#define GICR_CTLR_CES (1UL << 1)
+#define GICR_CTLR_IR (1UL << 2)
+#define GICR_CTLR_RWP (1UL << 3)
+
+#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff)
+
+#define EPPI_BASE_INTID 1056
+
+#define GICR_TYPER_NR_PPIS(r) \
+ ({ \
+ unsigned int __ppinum = ((r) >> 27) & 0x1f; \
+ unsigned int __nr_ppis = 16; \
+ if (__ppinum == 1 || __ppinum == 2) \
+ __nr_ppis += __ppinum * 32; \
+ \
+ __nr_ppis; \
+ })
+
+#define GICR_WAKER_ProcessorSleep (1U << 1)
+#define GICR_WAKER_ChildrenAsleep (1U << 2)
+
+#define GIC_BASER_CACHE_nCnB 0ULL
+#define GIC_BASER_CACHE_SameAsInner 0ULL
+#define GIC_BASER_CACHE_nC 1ULL
+#define GIC_BASER_CACHE_RaWt 2ULL
+#define GIC_BASER_CACHE_RaWb 3ULL
+#define GIC_BASER_CACHE_WaWt 4ULL
+#define GIC_BASER_CACHE_WaWb 5ULL
+#define GIC_BASER_CACHE_RaWaWt 6ULL
+#define GIC_BASER_CACHE_RaWaWb 7ULL
+#define GIC_BASER_CACHE_MASK 7ULL
+#define GIC_BASER_NonShareable 0ULL
+#define GIC_BASER_InnerShareable 1ULL
+#define GIC_BASER_OuterShareable 2ULL
+#define GIC_BASER_SHAREABILITY_MASK 3ULL
+
+#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \
+ (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
+
+#define GIC_BASER_SHAREABILITY(reg, type) \
+ (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
+
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
+#define GICR_PROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
+#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
+#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
+
+#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
+#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
+#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
+#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
+#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
+#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
+#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
+#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
+
+#define GICR_PROPBASER_IDBITS_MASK (0x1f)
+#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16))
+
+#define GICR_PENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
+#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
+#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
+
+#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
+#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
+#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
+#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
+#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
+#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
+#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
+#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
+
+#define GICR_PENDBASER_PTZ BIT_ULL(62)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_IGROUPR0 GICD_IGROUPR
+#define GICR_ISENABLER0 GICD_ISENABLER
+#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ICPENDR0 GICD_ICPENDR
+#define GICR_ISACTIVER0 GICD_ISACTIVER
+#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_IPRIORITYR0 GICD_IPRIORITYR
+#define GICR_ICFGR0 GICD_ICFGR
+#define GICR_IGRPMODR0 GICD_IGRPMODR
+#define GICR_NSACR GICD_NSACR
+
+#define GICR_TYPER_PLPIS (1U << 0)
+#define GICR_TYPER_VLPIS (1U << 1)
+#define GICR_TYPER_DIRTY (1U << 2)
+#define GICR_TYPER_DirectLPIS (1U << 3)
+#define GICR_TYPER_LAST (1U << 4)
+#define GICR_TYPER_RVPEID (1U << 7)
+#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24)
+#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32)
+
+#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32)
+#define GICR_INVLPIR_V GENMASK_ULL(63, 63)
+
+#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID
+#define GICR_INVALLR_V GICR_INVLPIR_V
+
+#define GIC_V3_REDIST_SIZE 0x20000
+
+#define LPI_PROP_GROUP1 (1 << 1)
+#define LPI_PROP_ENABLED (1 << 0)
+
+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER 0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK 0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK \
+ GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
+#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+/*
+ * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
+ * VPROPBASER and ITS_BASER. Just not quite any of the two.
+ */
+#define GICR_VPROPBASER_4_1_VALID (1ULL << 63)
+#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
+#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55)
+#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53)
+#define GICR_VPROPBASER_4_1_Z (1ULL << 52)
+#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12)
+#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0)
+
+#define GICR_VPENDBASER 0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK \
+ GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
+
+#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
+#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER_Dirty (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast (1ULL << 61)
+#define GICR_VPENDBASER_IDAI (1ULL << 62)
+#define GICR_VPENDBASER_Valid (1ULL << 63)
+
+/*
+ * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
+ * also use the above Valid, PendingLast and Dirty.
+ */
+#define GICR_VPENDBASER_4_1_DB (1ULL << 62)
+#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59)
+#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58)
+#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0)
+
+#define GICR_VSGIR 0x0080
+
+#define GICR_VSGIR_VPEID GENMASK(15, 0)
+
+#define GICR_VSGIPENDR 0x0088
+
+#define GICR_VSGIPENDR_BUSY (1U << 31)
+#define GICR_VSGIPENDR_PENDING GENMASK(15, 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR 0x0000
+#define GITS_IIDR 0x0004
+#define GITS_TYPER 0x0008
+#define GITS_MPIDR 0x0018
+#define GITS_CBASER 0x0080
+#define GITS_CWRITER 0x0088
+#define GITS_CREADR 0x0090
+#define GITS_BASER 0x0100
+#define GITS_IDREGS_BASE 0xffd0
+#define GITS_PIDR0 0xffe0
+#define GITS_PIDR1 0xffe4
+#define GITS_PIDR2 GICR_PIDR2
+#define GITS_PIDR4 0xffd0
+#define GITS_CIDR0 0xfff0
+#define GITS_CIDR1 0xfff4
+#define GITS_CIDR2 0xfff8
+#define GITS_CIDR3 0xfffc
+
+#define GITS_TRANSLATER 0x10040
+
+#define GITS_SGIR 0x20020
+
+#define GITS_SGIR_VPEID GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID GENMASK_ULL(3, 0)
+
+#define GITS_CTLR_ENABLE (1U << 0)
+#define GITS_CTLR_ImDe (1U << 1)
+#define GITS_CTLR_ITS_NUMBER_SHIFT 4
+#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
+#define GITS_CTLR_QUIESCENT (1U << 31)
+
+#define GITS_TYPER_PLPIS (1UL << 0)
+#define GITS_TYPER_VLPIS (1UL << 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
+#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS_SHIFT 8
+#define GITS_TYPER_DEVBITS_SHIFT 13
+#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
+#define GITS_TYPER_PTA (1UL << 19)
+#define GITS_TYPER_HCC_SHIFT 24
+#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
+#define GITS_TYPER_VMOVP (1ULL << 37)
+#define GITS_TYPER_VMAPP (1ULL << 40)
+#define GITS_TYPER_SVPET GENMASK_ULL(42, 41)
+
+#define GITS_IIDR_REV_SHIFT 12
+#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT 24
+
+#define GITS_CBASER_VALID (1ULL << 63)
+#define GITS_CBASER_SHAREABILITY_SHIFT (10)
+#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_CBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
+#define GITS_CBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
+#define GITS_CBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
+#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
+
+#define GITS_CBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
+
+#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
+#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
+#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
+#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
+#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
+#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
+#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
+#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
+
+#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
+
+#define GITS_BASER_NR_REGS 8
+
+#define GITS_BASER_VALID (1ULL << 63)
+#define GITS_BASER_INDIRECT (1ULL << 62)
+
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_BASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
+#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK
+#define GITS_BASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
+#define GITS_BASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
+
+#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
+#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
+#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
+#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
+#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
+#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
+#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
+#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
+
+#define GITS_BASER_TYPE_SHIFT (56)
+#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
+#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys) \
+ (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser) \
+ (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
+#define GITS_BASER_SHAREABILITY_SHIFT (10)
+#define GITS_BASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
+#define GITS_BASER_PAGE_SIZE_SHIFT (8)
+#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K)
+#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K)
+#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K)
+#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK)
+#define GITS_BASER_PAGES_MAX 256
+#define GITS_BASER_PAGES_SHIFT (0)
+#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1)
+
+#define GITS_BASER_TYPE_NONE 0
+#define GITS_BASER_TYPE_DEVICE 1
+#define GITS_BASER_TYPE_VCPU 2
+#define GITS_BASER_TYPE_RESERVED3 3
+#define GITS_BASER_TYPE_COLLECTION 4
+#define GITS_BASER_TYPE_RESERVED5 5
+#define GITS_BASER_TYPE_RESERVED6 6
+#define GITS_BASER_TYPE_RESERVED7 7
+
+#define GITS_LVL1_ENTRY_SIZE (8UL)
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD 0x08
+#define GITS_CMD_MAPC 0x09
+#define GITS_CMD_MAPTI 0x0a
+#define GITS_CMD_MAPI 0x0b
+#define GITS_CMD_MOVI 0x01
+#define GITS_CMD_DISCARD 0x0f
+#define GITS_CMD_INV 0x0c
+#define GITS_CMD_MOVALL 0x0e
+#define GITS_CMD_INVALL 0x0d
+#define GITS_CMD_INT 0x03
+#define GITS_CMD_CLEAR 0x04
+#define GITS_CMD_SYNC 0x05
+
+/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x) ((x) | 0x20)
+#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
+#define GITS_CMD_VMOVP GITS_CMD_GICv4(2)
+#define GITS_CMD_VSGI GITS_CMD_GICv4(3)
+#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe)
+
+/*
+ * ITS error numbers
+ */
+#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
+#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
+#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
+#define E_ITS_MAPD_DEVICE_OOR 0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR 0x010802
+#define E_ITS_MAPC_PROCNUM_OOR 0x010902
+#define E_ITS_MAPC_COLLECTION_OOR 0x010903
+#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04
+#define E_ITS_MAPTI_ID_OOR 0x010a05
+#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06
+#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07
+#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09
+#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01
+#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_SHIFT (1)
+#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT 0
+#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT 6
+#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
+#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
+#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT 14
+#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT 15
+#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_CTLR_EL1_RSS (0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange (0x1 << 19)
+#define ICC_PMR_EL1_SHIFT 0
+#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT 0
+#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT 0
+#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT 0
+#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT 0
+#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB (1U << 2)
+#define ICC_SRE_EL1_DFB (1U << 1)
+#define ICC_SRE_EL1_SRE (1U << 0)
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+#define ICC_IAR1_EL1_SPURIOUS 0x3ff
+
+#define ICC_SRE_EL2_SRE (1 << 0)
+#define ICC_SRE_EL2_ENABLE (1 << 3)
+
+#define ICC_SGI1R_TARGET_LIST_SHIFT 0
+#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFFINITY_1_SHIFT 16
+#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+#define ICC_SGI1R_SGI_ID_SHIFT 24
+#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+#define ICC_SGI1R_AFFINITY_2_SHIFT 32
+#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
+#define ICC_SGI1R_RS_SHIFT 44
+#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFFINITY_3_SHIFT 48
+#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTESTS_GIC_V3_ITS_H__
+#define __SELFTESTS_GIC_V3_ITS_H__
+
+#include <linux/sizes.h>
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size);
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid);
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid);
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+
+#endif // __SELFTESTS_GIC_V3_ITS_H__
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/brk-imm.h>
+#include <asm/esr.h>
+#include <asm/sysreg.h>
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as vcpu_get_reg() and vcpu_set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id) \
+ ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \
+ sys_reg_Op1(sys_reg_id), \
+ sys_reg_CRn(sys_reg_id), \
+ sys_reg_CRm(sys_reg_id), \
+ sys_reg_Op2(sys_reg_id))
+
+/*
+ * Default MAIR
+ * index attribute
+ * DEVICE_nGnRnE 0 0000:0000
+ * DEVICE_nGnRE 1 0000:0100
+ * DEVICE_GRE 2 0000:1100
+ * NORMAL_NC 3 0100:0100
+ * NORMAL 4 1111:1111
+ * NORMAL_WT 5 1011:1011
+ */
+
+/* Linux doesn't use these memory types, so let's define them. */
+#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
+#define MAIR_ATTR_NORMAL_WT UL(0xbb)
+
+#define MT_DEVICE_nGnRnE 0
+#define MT_DEVICE_nGnRE 1
+#define MT_DEVICE_GRE 2
+#define MT_NORMAL_NC 3
+#define MT_NORMAL 4
+#define MT_NORMAL_WT 5
+
+#define DEFAULT_MAIR_EL1 \
+ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code);
+
+struct ex_regs {
+ u64 regs[31];
+ u64 sp;
+ u64 pc;
+ u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+ VECTOR_SYNC_CURRENT_SP0,
+ VECTOR_IRQ_CURRENT_SP0,
+ VECTOR_FIQ_CURRENT_SP0,
+ VECTOR_ERROR_CURRENT_SP0,
+
+ VECTOR_SYNC_CURRENT,
+ VECTOR_IRQ_CURRENT,
+ VECTOR_FIQ_CURRENT,
+ VECTOR_ERROR_CURRENT,
+
+ VECTOR_SYNC_LOWER_64,
+ VECTOR_IRQ_LOWER_64,
+ VECTOR_FIQ_LOWER_64,
+ VECTOR_ERROR_LOWER_64,
+
+ VECTOR_SYNC_LOWER_32,
+ VECTOR_IRQ_LOWER_32,
+ VECTOR_FIQ_LOWER_32,
+ VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+ (v) == VECTOR_SYNC_CURRENT || \
+ (v) == VECTOR_SYNC_LOWER_64 || \
+ (v) == VECTOR_SYNC_LOWER_32)
+
+/* Access flag */
+#define PTE_AF (1ULL << 10)
+
+/* Access flag update enable/disable */
+#define TCR_EL1_HA (1ULL << 39)
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k);
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+ int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+ int vector, int ec, handler_fn handler);
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#define isb() asm volatile("isb" : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb() dmb(oshst)
+#define __iowmb() dma_wmb()
+
+#define dma_rmb() dmb(oshld)
+
+#define __iormb(v) \
+({ \
+ unsigned long tmp; \
+ \
+ dma_rmb(); \
+ \
+ /* \
+ * Courtesy of arch/arm64/include/asm/io.h: \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call \
+ * to udelay() will be ordered due to the ISB in __delay(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
+})
+
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+ asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+ u32 val;
+ asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+static __always_inline void __raw_writeq(u64 val, volatile void *addr)
+{
+ asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u64 __raw_readq(const volatile void *addr)
+{
+ u64 val;
+ asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
+
+#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));})
+#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+
+
+static inline void local_irq_enable(void)
+{
+ asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ asm volatile("msr daifset, #3" : : : "memory");
+}
+
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+ int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+ (((uint64_t)(count) << 52) | \
+ ((uint64_t)((base) >> 16) << 16) | \
+ ((uint64_t)(flags) << 12) | \
+ index)
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+
+#define VGIC_MAX_RESERVED 1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+
+int vgic_its_setup(struct kvm_vm *vm);
+
+#endif // SELFTEST_KVM_VGIC_H
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Definition for kernel virtual machines on s390x
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_DEBUG_PRINT_H
+#define SELFTEST_KVM_DEBUG_PRINT_H
+
+#include "asm/ptrace.h"
+#include "kvm_util.h"
+#include "sie.h"
+
+static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
+{
+ u64 pos;
+
+ pr_debug("%s (%p)\n", name, (void *)addr);
+ pr_debug(" 0/0x00---------|");
+ if (len > 8)
+ pr_debug(" 8/0x08---------|");
+ if (len > 16)
+ pr_debug(" 16/0x10--------|");
+ if (len > 24)
+ pr_debug(" 24/0x18--------|");
+ for (pos = 0; pos < len; pos += 8) {
+ if ((pos % 32) == 0)
+ pr_debug("\n %3lu 0x%.3lx ", pos, pos);
+ pr_debug(" %16lx", *((u64 *)(addr + pos)));
+ }
+ pr_debug("\n");
+}
+
+static inline void print_hex(const char *name, u64 addr)
+{
+ print_hex_bytes(name, addr, 512);
+}
+
+static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+ pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
+ run->flags,
+ run->psw_mask, run->psw_addr,
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
+ sie_block->psw_mask, sie_block->psw_addr);
+}
+
+static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+ print_hex_bytes("run", (u64)run, 0x150);
+ print_hex("sie_block", (u64)sie_block);
+ print_psw(run, sie_block);
+}
+
+static inline void print_regs(struct kvm_run *run)
+{
+ struct kvm_sync_regs *sync_regs = &run->s.regs;
+
+ print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
+ print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
+ print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
+}
+
+#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Get the facility bits with the STFLE instruction
+ */
+
+#ifndef SELFTEST_KVM_FACILITY_H
+#define SELFTEST_KVM_FACILITY_H
+
+#include <linux/bitops.h>
+
+/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
+#define NB_STFL_DOUBLEWORDS 32
+
+extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+extern bool stfle_flag;
+
+static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
+{
+ return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
+{
+ register unsigned long r0 asm("0") = nb_doublewords - 1;
+
+ asm volatile(" .insn s,0xb2b00000,0(%1)\n"
+ : "+d" (r0)
+ : "a" (fac)
+ : "memory", "cc");
+}
+
+static inline void setup_facilities(void)
+{
+ stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
+ stfle_flag = true;
+}
+
+static inline bool test_facility(int nr)
+{
+ if (!stfle_flag)
+ setup_facilities();
+ return test_bit_inv(nr, stfl_doublewords);
+}
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * s390x processor specific defines
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <linux/compiler.h>
+
+/* Bits in the region/segment table entry */
+#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */
+#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
+#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */
+#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
+#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */
+#define REGION_ENTRY_LENGTH 0x03 /* region third length */
+
+/* Bits in the page table entry */
+#define PAGE_INVALID 0x400 /* HW invalid bit */
+#define PAGE_PROTECT 0x200 /* HW read-only bit */
+#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
+
+/* Page size definitions */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+/* Is there a portable way to do this? */
+static inline void cpu_relax(void)
+{
+ barrier();
+}
+
+/* Get the instruction length */
+static inline int insn_length(unsigned char code)
+{
+ return ((((int)code + 64) >> 7) + 1) << 1;
+}
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definition for kernel virtual machines on s390.
+ *
+ * Adapted copy of struct definition kvm_s390_sie_block from
+ * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
+ *
+ * Copyright IBM Corp. 2008, 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ * Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_SIE_H
+#define SELFTEST_KVM_SIE_H
+
+#include <linux/types.h>
+
+struct kvm_s390_sie_block {
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+ __u32 cpuflags; /* 0x0000 */
+ __u32: 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32: 1;
+ __u32 ibc : 12;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE BIT(0)
+ __u32 prog0c; /* 0x000c */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
+#define PROG_BLOCK_SIE BIT(0)
+#define PROG_REQUEST BIT(1)
+ __u32 prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u32 svcc; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
+#define ECA_SII 0x00000001
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
+#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
+#define ICPT_VALIDITY 0x20
+#define ICPT_STOP 0x28
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
+ __u8 icptcode; /* 0x0050 */
+ __u8 icptstatus; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SPECI 0x08
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
+ __u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
+ __u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
+ __u8 ecb3; /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+ __u32 scaol; /* 0x0064 */
+ __u8 sdf; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
+ __u32 todpr; /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
+ __u64 psw_mask; /* 0x0090 */
+ __u64 psw_addr; /* 0x0098 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
+ __u32 reservedc8; /* 0x00c8 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+ __u32 crycbd; /* 0x00fc */
+ __u64 gcr[16]; /* 0x0100 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u64 riccbd; /* 0x01f0 */
+ __u64 gvrd; /* 0x01f8 */
+} __packed __aligned(512);
+
+#endif /* SELFTEST_KVM_SIE_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+ asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Definition for kernel virtual machines on s390x
- *
- * Copyright IBM Corp. 2024
- *
- * Authors:
- * Christoph Schlameuss <schlameuss@linux.ibm.com>
- */
-
-#ifndef SELFTEST_KVM_DEBUG_PRINT_H
-#define SELFTEST_KVM_DEBUG_PRINT_H
-
-#include "asm/ptrace.h"
-#include "kvm_util.h"
-#include "sie.h"
-
-static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
-{
- u64 pos;
-
- pr_debug("%s (%p)\n", name, (void *)addr);
- pr_debug(" 0/0x00---------|");
- if (len > 8)
- pr_debug(" 8/0x08---------|");
- if (len > 16)
- pr_debug(" 16/0x10--------|");
- if (len > 24)
- pr_debug(" 24/0x18--------|");
- for (pos = 0; pos < len; pos += 8) {
- if ((pos % 32) == 0)
- pr_debug("\n %3lu 0x%.3lx ", pos, pos);
- pr_debug(" %16lx", *((u64 *)(addr + pos)));
- }
- pr_debug("\n");
-}
-
-static inline void print_hex(const char *name, u64 addr)
-{
- print_hex_bytes(name, addr, 512);
-}
-
-static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
-{
- pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
- run->flags,
- run->psw_mask, run->psw_addr,
- run->exit_reason, exit_reason_str(run->exit_reason));
- pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
- sie_block->psw_mask, sie_block->psw_addr);
-}
-
-static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
-{
- print_hex_bytes("run", (u64)run, 0x150);
- print_hex("sie_block", (u64)sie_block);
- print_psw(run, sie_block);
-}
-
-static inline void print_regs(struct kvm_run *run)
-{
- struct kvm_sync_regs *sync_regs = &run->s.regs;
-
- print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
- print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
- print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
-}
-
-#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later
- *
- * Test handler for the s390x DIAGNOSE 0x0318 instruction.
- *
- * Copyright (C) 2020, IBM
- */
-
-#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
-#define SELFTEST_KVM_DIAG318_TEST_HANDLER
-
-uint64_t get_diag318_info(void);
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- * Hariharan Mari <hari55@linux.ibm.com>
- *
- * Get the facility bits with the STFLE instruction
- */
-
-#ifndef SELFTEST_KVM_FACILITY_H
-#define SELFTEST_KVM_FACILITY_H
-
-#include <linux/bitops.h>
-
-/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
-#define NB_STFL_DOUBLEWORDS 32
-
-extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
-extern bool stfle_flag;
-
-static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
-{
- return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
-}
-
-static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
-{
- register unsigned long r0 asm("0") = nb_doublewords - 1;
-
- asm volatile(" .insn s,0xb2b00000,0(%1)\n"
- : "+d" (r0)
- : "a" (fac)
- : "memory", "cc");
-}
-
-static inline void setup_facilities(void)
-{
- stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
- stfle_flag = true;
-}
-
-static inline bool test_facility(int nr)
-{
- if (!stfle_flag)
- setup_facilities();
- return test_bit_inv(nr, stfl_doublewords);
-}
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-struct kvm_vm_arch {};
-
-#endif // SELFTEST_KVM_UTIL_ARCH_H
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * s390x processor specific defines
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <linux/compiler.h>
-
-/* Bits in the region/segment table entry */
-#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */
-#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */
-#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
-#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */
-#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
-#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */
-#define REGION_ENTRY_LENGTH 0x03 /* region third length */
-
-/* Bits in the page table entry */
-#define PAGE_INVALID 0x400 /* HW invalid bit */
-#define PAGE_PROTECT 0x200 /* HW read-only bit */
-#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
-
-/* Page size definitions */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE - 1))
-
-/* Is there a portable way to do this? */
-static inline void cpu_relax(void)
-{
- barrier();
-}
-
-/* Get the instruction length */
-static inline int insn_length(unsigned char code)
-{
- return ((((int)code + 64) >> 7) + 1) << 1;
-}
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Definition for kernel virtual machines on s390.
- *
- * Adapted copy of struct definition kvm_s390_sie_block from
- * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
- *
- * Copyright IBM Corp. 2008, 2024
- *
- * Authors:
- * Christoph Schlameuss <schlameuss@linux.ibm.com>
- * Carsten Otte <cotte@de.ibm.com>
- */
-
-#ifndef SELFTEST_KVM_SIE_H
-#define SELFTEST_KVM_SIE_H
-
-#include <linux/types.h>
-
-struct kvm_s390_sie_block {
-#define CPUSTAT_STOPPED 0x80000000
-#define CPUSTAT_WAIT 0x10000000
-#define CPUSTAT_ECALL_PEND 0x08000000
-#define CPUSTAT_STOP_INT 0x04000000
-#define CPUSTAT_IO_INT 0x02000000
-#define CPUSTAT_EXT_INT 0x01000000
-#define CPUSTAT_RUNNING 0x00800000
-#define CPUSTAT_RETAINED 0x00400000
-#define CPUSTAT_TIMING_SUB 0x00020000
-#define CPUSTAT_SIE_SUB 0x00010000
-#define CPUSTAT_RRF 0x00008000
-#define CPUSTAT_SLSV 0x00004000
-#define CPUSTAT_SLSR 0x00002000
-#define CPUSTAT_ZARCH 0x00000800
-#define CPUSTAT_MCDS 0x00000100
-#define CPUSTAT_KSS 0x00000200
-#define CPUSTAT_SM 0x00000080
-#define CPUSTAT_IBS 0x00000040
-#define CPUSTAT_GED2 0x00000010
-#define CPUSTAT_G 0x00000008
-#define CPUSTAT_GED 0x00000004
-#define CPUSTAT_J 0x00000002
-#define CPUSTAT_P 0x00000001
- __u32 cpuflags; /* 0x0000 */
- __u32: 1; /* 0x0004 */
- __u32 prefix : 18;
- __u32: 1;
- __u32 ibc : 12;
- __u8 reserved08[4]; /* 0x0008 */
-#define PROG_IN_SIE BIT(0)
- __u32 prog0c; /* 0x000c */
- union {
- __u8 reserved10[16]; /* 0x0010 */
- struct {
- __u64 pv_handle_cpu;
- __u64 pv_handle_config;
- };
- };
-#define PROG_BLOCK_SIE BIT(0)
-#define PROG_REQUEST BIT(1)
- __u32 prog20; /* 0x0020 */
- __u8 reserved24[4]; /* 0x0024 */
- __u64 cputm; /* 0x0028 */
- __u64 ckc; /* 0x0030 */
- __u64 epoch; /* 0x0038 */
- __u32 svcc; /* 0x0040 */
-#define LCTL_CR0 0x8000
-#define LCTL_CR6 0x0200
-#define LCTL_CR9 0x0040
-#define LCTL_CR10 0x0020
-#define LCTL_CR11 0x0010
-#define LCTL_CR14 0x0002
- __u16 lctl; /* 0x0044 */
- __s16 icpua; /* 0x0046 */
-#define ICTL_OPEREXC 0x80000000
-#define ICTL_PINT 0x20000000
-#define ICTL_LPSW 0x00400000
-#define ICTL_STCTL 0x00040000
-#define ICTL_ISKE 0x00004000
-#define ICTL_SSKE 0x00002000
-#define ICTL_RRBE 0x00001000
-#define ICTL_TPROT 0x00000200
- __u32 ictl; /* 0x0048 */
-#define ECA_CEI 0x80000000
-#define ECA_IB 0x40000000
-#define ECA_SIGPI 0x10000000
-#define ECA_MVPGI 0x01000000
-#define ECA_AIV 0x00200000
-#define ECA_VX 0x00020000
-#define ECA_PROTEXCI 0x00002000
-#define ECA_APIE 0x00000008
-#define ECA_SII 0x00000001
- __u32 eca; /* 0x004c */
-#define ICPT_INST 0x04
-#define ICPT_PROGI 0x08
-#define ICPT_INSTPROGI 0x0C
-#define ICPT_EXTREQ 0x10
-#define ICPT_EXTINT 0x14
-#define ICPT_IOREQ 0x18
-#define ICPT_WAIT 0x1c
-#define ICPT_VALIDITY 0x20
-#define ICPT_STOP 0x28
-#define ICPT_OPEREXC 0x2C
-#define ICPT_PARTEXEC 0x38
-#define ICPT_IOINST 0x40
-#define ICPT_KSS 0x5c
-#define ICPT_MCHKREQ 0x60
-#define ICPT_INT_ENABLE 0x64
-#define ICPT_PV_INSTR 0x68
-#define ICPT_PV_NOTIFY 0x6c
-#define ICPT_PV_PREF 0x70
- __u8 icptcode; /* 0x0050 */
- __u8 icptstatus; /* 0x0051 */
- __u16 ihcpu; /* 0x0052 */
- __u8 reserved54; /* 0x0054 */
-#define IICTL_CODE_NONE 0x00
-#define IICTL_CODE_MCHK 0x01
-#define IICTL_CODE_EXT 0x02
-#define IICTL_CODE_IO 0x03
-#define IICTL_CODE_RESTART 0x04
-#define IICTL_CODE_SPECIFICATION 0x10
-#define IICTL_CODE_OPERAND 0x11
- __u8 iictl; /* 0x0055 */
- __u16 ipa; /* 0x0056 */
- __u32 ipb; /* 0x0058 */
- __u32 scaoh; /* 0x005c */
-#define FPF_BPBC 0x20
- __u8 fpf; /* 0x0060 */
-#define ECB_GS 0x40
-#define ECB_TE 0x10
-#define ECB_SPECI 0x08
-#define ECB_SRSI 0x04
-#define ECB_HOSTPROTINT 0x02
-#define ECB_PTF 0x01
- __u8 ecb; /* 0x0061 */
-#define ECB2_CMMA 0x80
-#define ECB2_IEP 0x20
-#define ECB2_PFMFI 0x08
-#define ECB2_ESCA 0x04
-#define ECB2_ZPCI_LSI 0x02
- __u8 ecb2; /* 0x0062 */
-#define ECB3_AISI 0x20
-#define ECB3_AISII 0x10
-#define ECB3_DEA 0x08
-#define ECB3_AES 0x04
-#define ECB3_RI 0x01
- __u8 ecb3; /* 0x0063 */
-#define ESCA_SCAOL_MASK ~0x3fU
- __u32 scaol; /* 0x0064 */
- __u8 sdf; /* 0x0068 */
- __u8 epdx; /* 0x0069 */
- __u8 cpnc; /* 0x006a */
- __u8 reserved6b; /* 0x006b */
- __u32 todpr; /* 0x006c */
-#define GISA_FORMAT1 0x00000001
- __u32 gd; /* 0x0070 */
- __u8 reserved74[12]; /* 0x0074 */
- __u64 mso; /* 0x0080 */
- __u64 msl; /* 0x0088 */
- __u64 psw_mask; /* 0x0090 */
- __u64 psw_addr; /* 0x0098 */
- __u64 gg14; /* 0x00a0 */
- __u64 gg15; /* 0x00a8 */
- __u8 reservedb0[8]; /* 0x00b0 */
-#define HPID_KVM 0x4
-#define HPID_VSIE 0x5
- __u8 hpid; /* 0x00b8 */
- __u8 reservedb9[7]; /* 0x00b9 */
- union {
- struct {
- __u32 eiparams; /* 0x00c0 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
- };
- __u64 mcic; /* 0x00c0 */
- } __packed;
- __u32 reservedc8; /* 0x00c8 */
- union {
- struct {
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- };
- __u32 edc; /* 0x00cc */
- } __packed;
- union {
- struct {
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
- };
- __u64 faddr; /* 0x00d0 */
- } __packed;
- __u64 peraddr; /* 0x00d8 */
- __u8 eai; /* 0x00e0 */
- __u8 peraid; /* 0x00e1 */
- __u8 oai; /* 0x00e2 */
- __u8 armid; /* 0x00e3 */
- __u8 reservede4[4]; /* 0x00e4 */
- union {
- __u64 tecmc; /* 0x00e8 */
- struct {
- __u16 subchannel_id; /* 0x00e8 */
- __u16 subchannel_nr; /* 0x00ea */
- __u32 io_int_parm; /* 0x00ec */
- __u32 io_int_word; /* 0x00f0 */
- };
- } __packed;
- __u8 reservedf4[8]; /* 0x00f4 */
-#define CRYCB_FORMAT_MASK 0x00000003
-#define CRYCB_FORMAT0 0x00000000
-#define CRYCB_FORMAT1 0x00000001
-#define CRYCB_FORMAT2 0x00000003
- __u32 crycbd; /* 0x00fc */
- __u64 gcr[16]; /* 0x0100 */
- union {
- __u64 gbea; /* 0x0180 */
- __u64 sidad;
- };
- __u8 reserved188[8]; /* 0x0188 */
- __u64 sdnxo; /* 0x0190 */
- __u8 reserved198[8]; /* 0x0198 */
- __u32 fac; /* 0x01a0 */
- __u8 reserved1a4[20]; /* 0x01a4 */
- __u64 cbrlo; /* 0x01b8 */
- __u8 reserved1c0[8]; /* 0x01c0 */
-#define ECD_HOSTREGMGMT 0x20000000
-#define ECD_MEF 0x08000000
-#define ECD_ETOKENF 0x02000000
-#define ECD_ECC 0x00200000
- __u32 ecd; /* 0x01c8 */
- __u8 reserved1cc[18]; /* 0x01cc */
- __u64 pp; /* 0x01de */
- __u8 reserved1e6[2]; /* 0x01e6 */
- __u64 itdba; /* 0x01e8 */
- __u64 riccbd; /* 0x01f0 */
- __u64 gvrd; /* 0x01f8 */
-} __packed __aligned(512);
-
-#endif /* SELFTEST_KVM_SIE_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
-
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
- /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
- asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
-#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+#include "ucall_common.h"
+
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_IRR 0x200
+#define APIC_ICR 0x300
+#define APIC_LVTCMCI 0x2f0
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+#define APIC_LVTT 0x320
+#define APIC_LVT_TIMER_ONESHOT (0 << 17)
+#define APIC_LVT_TIMER_PERIODIC (1 << 17)
+#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
+#define APIC_LVT_MASKED (1 << 16)
+#define APIC_TMICT 0x380
+#define APIC_TMCCT 0x390
+#define APIC_TDCR 0x3E0
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+ return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+ ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+ return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
+{
+ return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+ uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+ __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
+ fault, APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
+{
+ uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+ __GUEST_ASSERT(fault == GP_VECTOR,
+ "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
+ APIC_BASE_MSR + (reg >> 4), value, fault);
+}
+
+
+#endif /* SELFTEST_KVM_APIC_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "hyperv.h"
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#define EVMCS_VERSION 1
+
+extern bool enable_evmcs;
+
+struct hv_enlightened_vmcs {
+ u32 revision_id;
+ u32 abort;
+
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+
+ u16 padding16_1;
+
+ u64 host_ia32_pat;
+ u64 host_ia32_efer;
+
+ u64 host_cr0;
+ u64 host_cr3;
+ u64 host_cr4;
+
+ u64 host_ia32_sysenter_esp;
+ u64 host_ia32_sysenter_eip;
+ u64 host_rip;
+ u32 host_ia32_sysenter_cs;
+
+ u32 pin_based_vm_exec_control;
+ u32 vm_exit_controls;
+ u32 secondary_vm_exec_control;
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+
+ u64 guest_es_base;
+ u64 guest_cs_base;
+ u64 guest_ss_base;
+ u64 guest_ds_base;
+ u64 guest_fs_base;
+ u64 guest_gs_base;
+ u64 guest_ldtr_base;
+ u64 guest_tr_base;
+ u64 guest_gdtr_base;
+ u64 guest_idtr_base;
+
+ u64 padding64_1[3];
+
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+
+ u64 cr3_target_value0;
+ u64 cr3_target_value1;
+ u64 cr3_target_value2;
+ u64 cr3_target_value3;
+
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+
+ u32 cr3_target_count;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_msr_load_count;
+
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 vmcs_link_pointer;
+
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_ia32_efer;
+
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+
+ u64 guest_pending_dbg_exceptions;
+ u64 guest_sysenter_esp;
+ u64 guest_sysenter_eip;
+
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+
+ u64 cr0_guest_host_mask;
+ u64 cr4_guest_host_mask;
+ u64 cr0_read_shadow;
+ u64 cr4_read_shadow;
+ u64 guest_cr0;
+ u64 guest_cr3;
+ u64 guest_cr4;
+ u64 guest_dr7;
+
+ u64 host_fs_base;
+ u64 host_gs_base;
+ u64 host_tr_base;
+ u64 host_gdtr_base;
+ u64 host_idtr_base;
+ u64 host_rsp;
+
+ u64 ept_pointer;
+
+ u16 virtual_processor_id;
+ u16 padding16_2[3];
+
+ u64 padding64_2[5];
+ u64 guest_physical_address;
+
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+
+ u64 exit_qualification;
+ u64 exit_io_instruction_ecx;
+ u64 exit_io_instruction_esi;
+ u64 exit_io_instruction_edi;
+ u64 exit_io_instruction_eip;
+
+ u64 guest_linear_address;
+ u64 guest_rsp;
+ u64 guest_rflags;
+
+ u32 guest_interruptibility_info;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 vm_entry_controls;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+
+ u64 guest_rip;
+
+ u32 hv_clean_fields;
+ u32 padding32_1;
+ u32 hv_synthetic_controls;
+ struct {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 reserved:30;
+ } __packed hv_enlightenments_control;
+ u32 hv_vp_id;
+ u32 padding32_2;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 padding64_4[4];
+ u64 guest_bndcfgs;
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
+ u64 xss_exit_bitmap;
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
+} __packed;
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
+
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
+
+extern struct hv_enlightened_vmcs *current_evmcs;
+
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
+
+static inline void evmcs_enable(void)
+{
+ enable_evmcs = true;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+ current_vp_assist->current_nested_vmcs = vmcs_pa;
+ current_vp_assist->enlighten_vmentry = 1;
+
+ current_evmcs = vmcs;
+
+ return 0;
+}
+
+static inline bool load_evmcs(struct hyperv_test_pages *hv)
+{
+ if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
+ return false;
+
+ current_evmcs->revision_id = EVMCS_VERSION;
+
+ return true;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+ *value = current_vp_assist->current_nested_vmcs &
+ ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+ switch (encoding) {
+ case GUEST_RIP:
+ *value = current_evmcs->guest_rip;
+ break;
+ case GUEST_RSP:
+ *value = current_evmcs->guest_rsp;
+ break;
+ case GUEST_RFLAGS:
+ *value = current_evmcs->guest_rflags;
+ break;
+ case HOST_IA32_PAT:
+ *value = current_evmcs->host_ia32_pat;
+ break;
+ case HOST_IA32_EFER:
+ *value = current_evmcs->host_ia32_efer;
+ break;
+ case HOST_CR0:
+ *value = current_evmcs->host_cr0;
+ break;
+ case HOST_CR3:
+ *value = current_evmcs->host_cr3;
+ break;
+ case HOST_CR4:
+ *value = current_evmcs->host_cr4;
+ break;
+ case HOST_IA32_SYSENTER_ESP:
+ *value = current_evmcs->host_ia32_sysenter_esp;
+ break;
+ case HOST_IA32_SYSENTER_EIP:
+ *value = current_evmcs->host_ia32_sysenter_eip;
+ break;
+ case HOST_RIP:
+ *value = current_evmcs->host_rip;
+ break;
+ case IO_BITMAP_A:
+ *value = current_evmcs->io_bitmap_a;
+ break;
+ case IO_BITMAP_B:
+ *value = current_evmcs->io_bitmap_b;
+ break;
+ case MSR_BITMAP:
+ *value = current_evmcs->msr_bitmap;
+ break;
+ case GUEST_ES_BASE:
+ *value = current_evmcs->guest_es_base;
+ break;
+ case GUEST_CS_BASE:
+ *value = current_evmcs->guest_cs_base;
+ break;
+ case GUEST_SS_BASE:
+ *value = current_evmcs->guest_ss_base;
+ break;
+ case GUEST_DS_BASE:
+ *value = current_evmcs->guest_ds_base;
+ break;
+ case GUEST_FS_BASE:
+ *value = current_evmcs->guest_fs_base;
+ break;
+ case GUEST_GS_BASE:
+ *value = current_evmcs->guest_gs_base;
+ break;
+ case GUEST_LDTR_BASE:
+ *value = current_evmcs->guest_ldtr_base;
+ break;
+ case GUEST_TR_BASE:
+ *value = current_evmcs->guest_tr_base;
+ break;
+ case GUEST_GDTR_BASE:
+ *value = current_evmcs->guest_gdtr_base;
+ break;
+ case GUEST_IDTR_BASE:
+ *value = current_evmcs->guest_idtr_base;
+ break;
+ case TSC_OFFSET:
+ *value = current_evmcs->tsc_offset;
+ break;
+ case VIRTUAL_APIC_PAGE_ADDR:
+ *value = current_evmcs->virtual_apic_page_addr;
+ break;
+ case VMCS_LINK_POINTER:
+ *value = current_evmcs->vmcs_link_pointer;
+ break;
+ case GUEST_IA32_DEBUGCTL:
+ *value = current_evmcs->guest_ia32_debugctl;
+ break;
+ case GUEST_IA32_PAT:
+ *value = current_evmcs->guest_ia32_pat;
+ break;
+ case GUEST_IA32_EFER:
+ *value = current_evmcs->guest_ia32_efer;
+ break;
+ case GUEST_PDPTR0:
+ *value = current_evmcs->guest_pdptr0;
+ break;
+ case GUEST_PDPTR1:
+ *value = current_evmcs->guest_pdptr1;
+ break;
+ case GUEST_PDPTR2:
+ *value = current_evmcs->guest_pdptr2;
+ break;
+ case GUEST_PDPTR3:
+ *value = current_evmcs->guest_pdptr3;
+ break;
+ case GUEST_PENDING_DBG_EXCEPTIONS:
+ *value = current_evmcs->guest_pending_dbg_exceptions;
+ break;
+ case GUEST_SYSENTER_ESP:
+ *value = current_evmcs->guest_sysenter_esp;
+ break;
+ case GUEST_SYSENTER_EIP:
+ *value = current_evmcs->guest_sysenter_eip;
+ break;
+ case CR0_GUEST_HOST_MASK:
+ *value = current_evmcs->cr0_guest_host_mask;
+ break;
+ case CR4_GUEST_HOST_MASK:
+ *value = current_evmcs->cr4_guest_host_mask;
+ break;
+ case CR0_READ_SHADOW:
+ *value = current_evmcs->cr0_read_shadow;
+ break;
+ case CR4_READ_SHADOW:
+ *value = current_evmcs->cr4_read_shadow;
+ break;
+ case GUEST_CR0:
+ *value = current_evmcs->guest_cr0;
+ break;
+ case GUEST_CR3:
+ *value = current_evmcs->guest_cr3;
+ break;
+ case GUEST_CR4:
+ *value = current_evmcs->guest_cr4;
+ break;
+ case GUEST_DR7:
+ *value = current_evmcs->guest_dr7;
+ break;
+ case HOST_FS_BASE:
+ *value = current_evmcs->host_fs_base;
+ break;
+ case HOST_GS_BASE:
+ *value = current_evmcs->host_gs_base;
+ break;
+ case HOST_TR_BASE:
+ *value = current_evmcs->host_tr_base;
+ break;
+ case HOST_GDTR_BASE:
+ *value = current_evmcs->host_gdtr_base;
+ break;
+ case HOST_IDTR_BASE:
+ *value = current_evmcs->host_idtr_base;
+ break;
+ case HOST_RSP:
+ *value = current_evmcs->host_rsp;
+ break;
+ case EPT_POINTER:
+ *value = current_evmcs->ept_pointer;
+ break;
+ case GUEST_BNDCFGS:
+ *value = current_evmcs->guest_bndcfgs;
+ break;
+ case XSS_EXIT_BITMAP:
+ *value = current_evmcs->xss_exit_bitmap;
+ break;
+ case GUEST_PHYSICAL_ADDRESS:
+ *value = current_evmcs->guest_physical_address;
+ break;
+ case EXIT_QUALIFICATION:
+ *value = current_evmcs->exit_qualification;
+ break;
+ case GUEST_LINEAR_ADDRESS:
+ *value = current_evmcs->guest_linear_address;
+ break;
+ case VM_EXIT_MSR_STORE_ADDR:
+ *value = current_evmcs->vm_exit_msr_store_addr;
+ break;
+ case VM_EXIT_MSR_LOAD_ADDR:
+ *value = current_evmcs->vm_exit_msr_load_addr;
+ break;
+ case VM_ENTRY_MSR_LOAD_ADDR:
+ *value = current_evmcs->vm_entry_msr_load_addr;
+ break;
+ case CR3_TARGET_VALUE0:
+ *value = current_evmcs->cr3_target_value0;
+ break;
+ case CR3_TARGET_VALUE1:
+ *value = current_evmcs->cr3_target_value1;
+ break;
+ case CR3_TARGET_VALUE2:
+ *value = current_evmcs->cr3_target_value2;
+ break;
+ case CR3_TARGET_VALUE3:
+ *value = current_evmcs->cr3_target_value3;
+ break;
+ case TPR_THRESHOLD:
+ *value = current_evmcs->tpr_threshold;
+ break;
+ case GUEST_INTERRUPTIBILITY_INFO:
+ *value = current_evmcs->guest_interruptibility_info;
+ break;
+ case CPU_BASED_VM_EXEC_CONTROL:
+ *value = current_evmcs->cpu_based_vm_exec_control;
+ break;
+ case EXCEPTION_BITMAP:
+ *value = current_evmcs->exception_bitmap;
+ break;
+ case VM_ENTRY_CONTROLS:
+ *value = current_evmcs->vm_entry_controls;
+ break;
+ case VM_ENTRY_INTR_INFO_FIELD:
+ *value = current_evmcs->vm_entry_intr_info_field;
+ break;
+ case VM_ENTRY_EXCEPTION_ERROR_CODE:
+ *value = current_evmcs->vm_entry_exception_error_code;
+ break;
+ case VM_ENTRY_INSTRUCTION_LEN:
+ *value = current_evmcs->vm_entry_instruction_len;
+ break;
+ case HOST_IA32_SYSENTER_CS:
+ *value = current_evmcs->host_ia32_sysenter_cs;
+ break;
+ case PIN_BASED_VM_EXEC_CONTROL:
+ *value = current_evmcs->pin_based_vm_exec_control;
+ break;
+ case VM_EXIT_CONTROLS:
+ *value = current_evmcs->vm_exit_controls;
+ break;
+ case SECONDARY_VM_EXEC_CONTROL:
+ *value = current_evmcs->secondary_vm_exec_control;
+ break;
+ case GUEST_ES_LIMIT:
+ *value = current_evmcs->guest_es_limit;
+ break;
+ case GUEST_CS_LIMIT:
+ *value = current_evmcs->guest_cs_limit;
+ break;
+ case GUEST_SS_LIMIT:
+ *value = current_evmcs->guest_ss_limit;
+ break;
+ case GUEST_DS_LIMIT:
+ *value = current_evmcs->guest_ds_limit;
+ break;
+ case GUEST_FS_LIMIT:
+ *value = current_evmcs->guest_fs_limit;
+ break;
+ case GUEST_GS_LIMIT:
+ *value = current_evmcs->guest_gs_limit;
+ break;
+ case GUEST_LDTR_LIMIT:
+ *value = current_evmcs->guest_ldtr_limit;
+ break;
+ case GUEST_TR_LIMIT:
+ *value = current_evmcs->guest_tr_limit;
+ break;
+ case GUEST_GDTR_LIMIT:
+ *value = current_evmcs->guest_gdtr_limit;
+ break;
+ case GUEST_IDTR_LIMIT:
+ *value = current_evmcs->guest_idtr_limit;
+ break;
+ case GUEST_ES_AR_BYTES:
+ *value = current_evmcs->guest_es_ar_bytes;
+ break;
+ case GUEST_CS_AR_BYTES:
+ *value = current_evmcs->guest_cs_ar_bytes;
+ break;
+ case GUEST_SS_AR_BYTES:
+ *value = current_evmcs->guest_ss_ar_bytes;
+ break;
+ case GUEST_DS_AR_BYTES:
+ *value = current_evmcs->guest_ds_ar_bytes;
+ break;
+ case GUEST_FS_AR_BYTES:
+ *value = current_evmcs->guest_fs_ar_bytes;
+ break;
+ case GUEST_GS_AR_BYTES:
+ *value = current_evmcs->guest_gs_ar_bytes;
+ break;
+ case GUEST_LDTR_AR_BYTES:
+ *value = current_evmcs->guest_ldtr_ar_bytes;
+ break;
+ case GUEST_TR_AR_BYTES:
+ *value = current_evmcs->guest_tr_ar_bytes;
+ break;
+ case GUEST_ACTIVITY_STATE:
+ *value = current_evmcs->guest_activity_state;
+ break;
+ case GUEST_SYSENTER_CS:
+ *value = current_evmcs->guest_sysenter_cs;
+ break;
+ case VM_INSTRUCTION_ERROR:
+ *value = current_evmcs->vm_instruction_error;
+ break;
+ case VM_EXIT_REASON:
+ *value = current_evmcs->vm_exit_reason;
+ break;
+ case VM_EXIT_INTR_INFO:
+ *value = current_evmcs->vm_exit_intr_info;
+ break;
+ case VM_EXIT_INTR_ERROR_CODE:
+ *value = current_evmcs->vm_exit_intr_error_code;
+ break;
+ case IDT_VECTORING_INFO_FIELD:
+ *value = current_evmcs->idt_vectoring_info_field;
+ break;
+ case IDT_VECTORING_ERROR_CODE:
+ *value = current_evmcs->idt_vectoring_error_code;
+ break;
+ case VM_EXIT_INSTRUCTION_LEN:
+ *value = current_evmcs->vm_exit_instruction_len;
+ break;
+ case VMX_INSTRUCTION_INFO:
+ *value = current_evmcs->vmx_instruction_info;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MASK:
+ *value = current_evmcs->page_fault_error_code_mask;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MATCH:
+ *value = current_evmcs->page_fault_error_code_match;
+ break;
+ case CR3_TARGET_COUNT:
+ *value = current_evmcs->cr3_target_count;
+ break;
+ case VM_EXIT_MSR_STORE_COUNT:
+ *value = current_evmcs->vm_exit_msr_store_count;
+ break;
+ case VM_EXIT_MSR_LOAD_COUNT:
+ *value = current_evmcs->vm_exit_msr_load_count;
+ break;
+ case VM_ENTRY_MSR_LOAD_COUNT:
+ *value = current_evmcs->vm_entry_msr_load_count;
+ break;
+ case HOST_ES_SELECTOR:
+ *value = current_evmcs->host_es_selector;
+ break;
+ case HOST_CS_SELECTOR:
+ *value = current_evmcs->host_cs_selector;
+ break;
+ case HOST_SS_SELECTOR:
+ *value = current_evmcs->host_ss_selector;
+ break;
+ case HOST_DS_SELECTOR:
+ *value = current_evmcs->host_ds_selector;
+ break;
+ case HOST_FS_SELECTOR:
+ *value = current_evmcs->host_fs_selector;
+ break;
+ case HOST_GS_SELECTOR:
+ *value = current_evmcs->host_gs_selector;
+ break;
+ case HOST_TR_SELECTOR:
+ *value = current_evmcs->host_tr_selector;
+ break;
+ case GUEST_ES_SELECTOR:
+ *value = current_evmcs->guest_es_selector;
+ break;
+ case GUEST_CS_SELECTOR:
+ *value = current_evmcs->guest_cs_selector;
+ break;
+ case GUEST_SS_SELECTOR:
+ *value = current_evmcs->guest_ss_selector;
+ break;
+ case GUEST_DS_SELECTOR:
+ *value = current_evmcs->guest_ds_selector;
+ break;
+ case GUEST_FS_SELECTOR:
+ *value = current_evmcs->guest_fs_selector;
+ break;
+ case GUEST_GS_SELECTOR:
+ *value = current_evmcs->guest_gs_selector;
+ break;
+ case GUEST_LDTR_SELECTOR:
+ *value = current_evmcs->guest_ldtr_selector;
+ break;
+ case GUEST_TR_SELECTOR:
+ *value = current_evmcs->guest_tr_selector;
+ break;
+ case VIRTUAL_PROCESSOR_ID:
+ *value = current_evmcs->virtual_processor_id;
+ break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->host_ia32_perf_global_ctrl;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->guest_ia32_perf_global_ctrl;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ *value = current_evmcs->encls_exiting_bitmap;
+ break;
+ case TSC_MULTIPLIER:
+ *value = current_evmcs->tsc_multiplier;
+ break;
+ default: return 1;
+ }
+
+ return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+ switch (encoding) {
+ case GUEST_RIP:
+ current_evmcs->guest_rip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case GUEST_RSP:
+ current_evmcs->guest_rsp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+ break;
+ case GUEST_RFLAGS:
+ current_evmcs->guest_rflags = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+ break;
+ case HOST_IA32_PAT:
+ current_evmcs->host_ia32_pat = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_IA32_EFER:
+ current_evmcs->host_ia32_efer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_CR0:
+ current_evmcs->host_cr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_CR3:
+ current_evmcs->host_cr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_CR4:
+ current_evmcs->host_cr4 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_IA32_SYSENTER_ESP:
+ current_evmcs->host_ia32_sysenter_esp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_IA32_SYSENTER_EIP:
+ current_evmcs->host_ia32_sysenter_eip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_RIP:
+ current_evmcs->host_rip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case IO_BITMAP_A:
+ current_evmcs->io_bitmap_a = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
+ break;
+ case IO_BITMAP_B:
+ current_evmcs->io_bitmap_b = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
+ break;
+ case MSR_BITMAP:
+ current_evmcs->msr_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ break;
+ case GUEST_ES_BASE:
+ current_evmcs->guest_es_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_CS_BASE:
+ current_evmcs->guest_cs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_SS_BASE:
+ current_evmcs->guest_ss_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_DS_BASE:
+ current_evmcs->guest_ds_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_FS_BASE:
+ current_evmcs->guest_fs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_GS_BASE:
+ current_evmcs->guest_gs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_LDTR_BASE:
+ current_evmcs->guest_ldtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_TR_BASE:
+ current_evmcs->guest_tr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_GDTR_BASE:
+ current_evmcs->guest_gdtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_IDTR_BASE:
+ current_evmcs->guest_idtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case TSC_OFFSET:
+ current_evmcs->tsc_offset = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case VIRTUAL_APIC_PAGE_ADDR:
+ current_evmcs->virtual_apic_page_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case VMCS_LINK_POINTER:
+ current_evmcs->vmcs_link_pointer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_IA32_DEBUGCTL:
+ current_evmcs->guest_ia32_debugctl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_IA32_PAT:
+ current_evmcs->guest_ia32_pat = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_IA32_EFER:
+ current_evmcs->guest_ia32_efer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_PDPTR0:
+ current_evmcs->guest_pdptr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_PDPTR1:
+ current_evmcs->guest_pdptr1 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_PDPTR2:
+ current_evmcs->guest_pdptr2 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_PDPTR3:
+ current_evmcs->guest_pdptr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_PENDING_DBG_EXCEPTIONS:
+ current_evmcs->guest_pending_dbg_exceptions = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_SYSENTER_ESP:
+ current_evmcs->guest_sysenter_esp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_SYSENTER_EIP:
+ current_evmcs->guest_sysenter_eip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case CR0_GUEST_HOST_MASK:
+ current_evmcs->cr0_guest_host_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case CR4_GUEST_HOST_MASK:
+ current_evmcs->cr4_guest_host_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case CR0_READ_SHADOW:
+ current_evmcs->cr0_read_shadow = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case CR4_READ_SHADOW:
+ current_evmcs->cr4_read_shadow = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case GUEST_CR0:
+ current_evmcs->guest_cr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case GUEST_CR3:
+ current_evmcs->guest_cr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case GUEST_CR4:
+ current_evmcs->guest_cr4 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case GUEST_DR7:
+ current_evmcs->guest_dr7 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+ break;
+ case HOST_FS_BASE:
+ current_evmcs->host_fs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+ break;
+ case HOST_GS_BASE:
+ current_evmcs->host_gs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+ break;
+ case HOST_TR_BASE:
+ current_evmcs->host_tr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+ break;
+ case HOST_GDTR_BASE:
+ current_evmcs->host_gdtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+ break;
+ case HOST_IDTR_BASE:
+ current_evmcs->host_idtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+ break;
+ case HOST_RSP:
+ current_evmcs->host_rsp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+ break;
+ case EPT_POINTER:
+ current_evmcs->ept_pointer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+ break;
+ case GUEST_BNDCFGS:
+ current_evmcs->guest_bndcfgs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case XSS_EXIT_BITMAP:
+ current_evmcs->xss_exit_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case GUEST_PHYSICAL_ADDRESS:
+ current_evmcs->guest_physical_address = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case EXIT_QUALIFICATION:
+ current_evmcs->exit_qualification = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case GUEST_LINEAR_ADDRESS:
+ current_evmcs->guest_linear_address = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case VM_EXIT_MSR_STORE_ADDR:
+ current_evmcs->vm_exit_msr_store_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case VM_EXIT_MSR_LOAD_ADDR:
+ current_evmcs->vm_exit_msr_load_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case VM_ENTRY_MSR_LOAD_ADDR:
+ current_evmcs->vm_entry_msr_load_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case CR3_TARGET_VALUE0:
+ current_evmcs->cr3_target_value0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case CR3_TARGET_VALUE1:
+ current_evmcs->cr3_target_value1 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case CR3_TARGET_VALUE2:
+ current_evmcs->cr3_target_value2 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case CR3_TARGET_VALUE3:
+ current_evmcs->cr3_target_value3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case TPR_THRESHOLD:
+ current_evmcs->tpr_threshold = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case GUEST_INTERRUPTIBILITY_INFO:
+ current_evmcs->guest_interruptibility_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+ break;
+ case CPU_BASED_VM_EXEC_CONTROL:
+ current_evmcs->cpu_based_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
+ break;
+ case EXCEPTION_BITMAP:
+ current_evmcs->exception_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
+ break;
+ case VM_ENTRY_CONTROLS:
+ current_evmcs->vm_entry_controls = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
+ break;
+ case VM_ENTRY_INTR_INFO_FIELD:
+ current_evmcs->vm_entry_intr_info_field = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+ break;
+ case VM_ENTRY_EXCEPTION_ERROR_CODE:
+ current_evmcs->vm_entry_exception_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+ break;
+ case VM_ENTRY_INSTRUCTION_LEN:
+ current_evmcs->vm_entry_instruction_len = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+ break;
+ case HOST_IA32_SYSENTER_CS:
+ current_evmcs->host_ia32_sysenter_cs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case PIN_BASED_VM_EXEC_CONTROL:
+ current_evmcs->pin_based_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+ break;
+ case VM_EXIT_CONTROLS:
+ current_evmcs->vm_exit_controls = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+ break;
+ case SECONDARY_VM_EXEC_CONTROL:
+ current_evmcs->secondary_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+ break;
+ case GUEST_ES_LIMIT:
+ current_evmcs->guest_es_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_CS_LIMIT:
+ current_evmcs->guest_cs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_SS_LIMIT:
+ current_evmcs->guest_ss_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_DS_LIMIT:
+ current_evmcs->guest_ds_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_FS_LIMIT:
+ current_evmcs->guest_fs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_GS_LIMIT:
+ current_evmcs->guest_gs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_LDTR_LIMIT:
+ current_evmcs->guest_ldtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_TR_LIMIT:
+ current_evmcs->guest_tr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_GDTR_LIMIT:
+ current_evmcs->guest_gdtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_IDTR_LIMIT:
+ current_evmcs->guest_idtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_ES_AR_BYTES:
+ current_evmcs->guest_es_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_CS_AR_BYTES:
+ current_evmcs->guest_cs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_SS_AR_BYTES:
+ current_evmcs->guest_ss_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_DS_AR_BYTES:
+ current_evmcs->guest_ds_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_FS_AR_BYTES:
+ current_evmcs->guest_fs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_GS_AR_BYTES:
+ current_evmcs->guest_gs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_LDTR_AR_BYTES:
+ current_evmcs->guest_ldtr_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_TR_AR_BYTES:
+ current_evmcs->guest_tr_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_ACTIVITY_STATE:
+ current_evmcs->guest_activity_state = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case GUEST_SYSENTER_CS:
+ current_evmcs->guest_sysenter_cs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case VM_INSTRUCTION_ERROR:
+ current_evmcs->vm_instruction_error = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case VM_EXIT_REASON:
+ current_evmcs->vm_exit_reason = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case VM_EXIT_INTR_INFO:
+ current_evmcs->vm_exit_intr_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case VM_EXIT_INTR_ERROR_CODE:
+ current_evmcs->vm_exit_intr_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case IDT_VECTORING_INFO_FIELD:
+ current_evmcs->idt_vectoring_info_field = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case IDT_VECTORING_ERROR_CODE:
+ current_evmcs->idt_vectoring_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case VM_EXIT_INSTRUCTION_LEN:
+ current_evmcs->vm_exit_instruction_len = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case VMX_INSTRUCTION_INFO:
+ current_evmcs->vmx_instruction_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MASK:
+ current_evmcs->page_fault_error_code_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case PAGE_FAULT_ERROR_CODE_MATCH:
+ current_evmcs->page_fault_error_code_match = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case CR3_TARGET_COUNT:
+ current_evmcs->cr3_target_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case VM_EXIT_MSR_STORE_COUNT:
+ current_evmcs->vm_exit_msr_store_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case VM_EXIT_MSR_LOAD_COUNT:
+ current_evmcs->vm_exit_msr_load_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case VM_ENTRY_MSR_LOAD_COUNT:
+ current_evmcs->vm_entry_msr_load_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ break;
+ case HOST_ES_SELECTOR:
+ current_evmcs->host_es_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_CS_SELECTOR:
+ current_evmcs->host_cs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_SS_SELECTOR:
+ current_evmcs->host_ss_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_DS_SELECTOR:
+ current_evmcs->host_ds_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_FS_SELECTOR:
+ current_evmcs->host_fs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_GS_SELECTOR:
+ current_evmcs->host_gs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case HOST_TR_SELECTOR:
+ current_evmcs->host_tr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case GUEST_ES_SELECTOR:
+ current_evmcs->guest_es_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_CS_SELECTOR:
+ current_evmcs->guest_cs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_SS_SELECTOR:
+ current_evmcs->guest_ss_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_DS_SELECTOR:
+ current_evmcs->guest_ds_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_FS_SELECTOR:
+ current_evmcs->guest_fs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_GS_SELECTOR:
+ current_evmcs->guest_gs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_LDTR_SELECTOR:
+ current_evmcs->guest_ldtr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case GUEST_TR_SELECTOR:
+ current_evmcs->guest_tr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+ break;
+ case VIRTUAL_PROCESSOR_ID:
+ current_evmcs->virtual_processor_id = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+ break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->host_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->guest_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ current_evmcs->encls_exiting_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case TSC_MULTIPLIER:
+ current_evmcs->tsc_multiplier = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ default: return 1;
+ }
+
+ return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+ int ret;
+
+ current_evmcs->hv_clean_fields = 0;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "mov %%rsp, (%[host_rsp]);"
+ "lea 1f(%%rip), %%rax;"
+ "mov %%rax, (%[host_rip]);"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"
+ ((uint64_t)¤t_evmcs->host_rsp),
+ [host_rip]"r"
+ ((uint64_t)¤t_evmcs->host_rip)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+ int ret;
+
+ /* HOST_RIP */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ /* HOST_RSP */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "mov %%rsp, (%[host_rsp]);"
+ "lea 1f(%%rip), %%rax;"
+ "mov %%rax, (%[host_rip]);"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"
+ ((uint64_t)¤t_evmcs->host_rsp),
+ [host_rip]"r"
+ ((uint64_t)¤t_evmcs->host_rip)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#include "processor.h"
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_HYPERCALL 0x40000001
+#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_X64_MSR_RESET 0x40000003
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
+#define HV_MSR_SYNIC_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
+#define HV_MSR_SYNTIMER_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
+#define HV_MSR_HYPERCALL_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
+#define HV_MSR_VP_INDEX_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
+#define HV_MSR_RESET_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
+#define HV_MSR_STAT_PAGES_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
+#define HV_ACCESS_FREQUENCY_MSRS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
+#define HV_ACCESS_REENLIGHTENMENT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
+#define HV_ACCESS_TSC_INVARIANT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
+#define HV_ACCESS_PARTITION_ID \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
+#define HV_ACCESS_MEMORY_POOL \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
+#define HV_ADJUST_MESSAGE_BUFFERS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
+#define HV_POST_MESSAGES \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
+#define HV_SIGNAL_EVENTS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
+#define HV_CREATE_PORT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
+#define HV_CONNECT_PORT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
+#define HV_ACCESS_STATS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
+#define HV_DEBUGGING \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
+#define HV_CPU_MANAGEMENT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
+#define HV_ISOLATION \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
+#define HV_X64_PERF_MONITOR_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EAX */
+#define HV_X64_NESTED_DIRECT_FLUSH \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
+#define HV_X64_NESTED_MSR_BITMAP \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EBX */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
+#define HV_STATUS_ACCESS_DENIED 6
+#define HV_STATUS_OPERATION_DENIED 8
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
+#define HV_STATUS_INVALID_CONNECTION_ID 18
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+
+/*
+ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
+ * is set to the hypercall status (if no exception occurred).
+ */
+static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address,
+ uint64_t *hv_status)
+{
+ uint64_t error_code;
+ uint8_t vector;
+
+ /* Note both the hypercall and the "asm safe" clobber r9-r11. */
+ asm volatile("mov %[output_address], %%r8\n\t"
+ KVM_ASM_SAFE("vmcall")
+ : "=a" (*hv_status),
+ "+c" (control), "+d" (input_address),
+ KVM_ASM_SAFE_OUTPUTS(vector, error_code)
+ : [output_address] "r"(output_address),
+ "a" (-EFAULT)
+ : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+ return vector;
+}
+
+/* Issue a Hyper-V hypercall and assert that it succeeded. */
+static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address)
+{
+ uint64_t hv_status;
+ uint8_t vector;
+
+ vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
+
+ GUEST_ASSERT(!vector);
+ GUEST_ASSERT((hv_status & 0xffff) == 0);
+}
+
+/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
+static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
+{
+ int i;
+
+ for (i = 0; i < n_sse_regs; i++)
+ write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
+}
+
+/* Proper HV_X64_MSR_GUEST_OS_ID value */
+#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
+
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_nested_enlightenments_control {
+ struct {
+ __u32 directhypercall:1;
+ __u32 reserved:31;
+ } features;
+ struct {
+ __u32 reserved;
+ } hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved1;
+ __u64 vtl_control[3];
+ struct hv_nested_enlightenments_control nested_control;
+ __u8 enlighten_vmentry;
+ __u8 reserved2[7];
+ __u64 current_nested_vmcs;
+} __packed;
+
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+
+struct hyperv_test_pages {
+ /* VP assist page */
+ void *vp_assist_hva;
+ uint64_t vp_assist_gpa;
+ void *vp_assist;
+
+ /* Partition assist page */
+ void *partition_assist_hva;
+ uint64_t partition_assist_gpa;
+ void *partition_assist;
+
+ /* Enlightened VMCS */
+ void *enlightened_vmcs_hva;
+ uint64_t enlightened_vmcs_gpa;
+ void *enlightened_vmcs;
+};
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva);
+
+/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
+#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "kvm_util_types.h"
+#include "test_util.h"
+
+extern bool is_forced_emulation_enabled;
+
+struct kvm_vm_arch {
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+ vm_vaddr_t idt;
+
+ uint64_t c_bit;
+ uint64_t s_bit;
+ int sev_fd;
+ bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+ return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+ __vm_arch_has_protected_memory(&(vm)->arch)
+
+#define vcpu_arch_put_guest(mem, __val) \
+do { \
+ const typeof(mem) val = (__val); \
+ \
+ if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \
+ (mem) = val; \
+ } else if (guest_random_bool(&guest_rng)) { \
+ __asm__ __volatile__(KVM_FEP "mov %1, %0" \
+ : "+m" (mem) \
+ : "r" (val) : "memory"); \
+ } else { \
+ uint64_t __old = READ_ONCE(mem); \
+ \
+ __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \
+ : [ptr] "+m" (mem), [old] "+a" (__old) \
+ : [new]"r" (val) : "memory", "cc"); \
+ } \
+} while (0)
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MCE_H
+#define SELFTEST_KVM_MCE_H
+
+#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
+#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
+#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define KVM_MAX_MCE_BANKS 32
+#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */
+#define MCI_STATUS_VAL (1ULL << 63) /* valid error */
+#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL << 60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCI_CTL2_CMCI_EN BIT_ULL(30)
+
+#endif /* SELFTEST_KVM_MCE_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdint.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format. Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \
+ ((eventsel) & 0xff) | \
+ ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS BIT_ULL(29)
+#define INTEL_RDPMC_FIXED BIT_ULL(30)
+#define INTEL_RDPMC_FAST BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL BIT_ULL(0)
+#define FIXED_PMC_USER BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI BIT_ULL(3)
+#define FIXED_PMC_NR_BITS 4
+#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES BIT_ULL(13)
+#define PMU_CAP_LBR_FMT 0x3f
+
+#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00)
+#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
+#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01)
+#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f)
+#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41)
+#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
+#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
+#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
+
+#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
+#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
+#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00)
+#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note! The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+ INTEL_ARCH_CPU_CYCLES_INDEX,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+ INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+ INTEL_ARCH_LLC_REFERENCES_INDEX,
+ INTEL_ARCH_LLC_MISSES_INDEX,
+ INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+ INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+ INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+ NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+ AMD_ZEN_CORE_CYCLES_INDEX,
+ AMD_ZEN_INSTRUCTIONS_INDEX,
+ AMD_ZEN_BRANCHES_INDEX,
+ AMD_ZEN_BRANCH_MISSES_INDEX,
+ NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+#endif /* SELFTEST_KVM_PMU_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+
+#include <asm/msr-index.h>
+#include <asm/prctl.h>
+
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+extern bool host_cpu_is_intel;
+extern bool host_cpu_is_amd;
+extern uint64_t guest_tsc_khz;
+
+#ifndef MAX_NR_CPUID_ENTRIES
+#define MAX_NR_CPUID_ENTRIES 100
+#endif
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
+#define NMI_VECTOR 0x02
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_LA57 (1ul << 12)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+struct xstate_header {
+ u64 xstate_bv;
+ u64 xcomp_bv;
+ u64 reserved[6];
+} __attribute__((packed));
+
+struct xstate {
+ u8 i387[512];
+ struct xstate_header header;
+ u8 extended_state_area[0];
+} __attribute__ ((packed, aligned (64)));
+
+#define XFEATURE_MASK_FP BIT_ULL(0)
+#define XFEATURE_MASK_SSE BIT_ULL(1)
+#define XFEATURE_MASK_YMM BIT_ULL(2)
+#define XFEATURE_MASK_BNDREGS BIT_ULL(3)
+#define XFEATURE_MASK_BNDCSR BIT_ULL(4)
+#define XFEATURE_MASK_OPMASK BIT_ULL(5)
+#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
+#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
+#define XFEATURE_MASK_PT BIT_ULL(8)
+#define XFEATURE_MASK_PKRU BIT_ULL(9)
+#define XFEATURE_MASK_PASID BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
+#define XFEATURE_MASK_LBR BIT_ULL(15)
+#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
+#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
+
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \
+ XFEATURE_MASK_XTILE_CFG)
+
+/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
+enum cpuid_output_regs {
+ KVM_CPUID_EAX,
+ KVM_CPUID_EBX,
+ KVM_CPUID_ECX,
+ KVM_CPUID_EDX
+};
+
+/*
+ * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
+ * passed by value with no overhead.
+ */
+struct kvm_x86_cpu_feature {
+ u32 function;
+ u16 index;
+ u8 reg;
+ u8 bit;
+};
+#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \
+({ \
+ struct kvm_x86_cpu_feature feature = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .bit = __bit, \
+ }; \
+ \
+ kvm_static_assert((fn & 0xc0000000) == 0 || \
+ (fn & 0xc0000000) == 0x40000000 || \
+ (fn & 0xc0000000) == 0x80000000 || \
+ (fn & 0xc0000000) == 0xc0000000); \
+ kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
+ feature; \
+})
+
+/*
+ * Basic Leafs, a.k.a. Intel defined
+ */
+#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
+#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
+#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
+#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
+#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
+#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
+#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
+#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
+#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
+#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
+#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
+#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
+#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
+#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
+#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
+#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
+#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
+#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
+#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
+#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
+#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
+#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
+#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
+#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
+#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
+#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
+#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
+#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
+#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
+#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
+#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
+#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
+#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
+#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
+#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
+#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
+#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
+#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
+#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
+#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
+#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
+#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
+
+/*
+ * Extended Leafs, a.k.a. AMD defined
+ */
+#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
+#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
+#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
+#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
+#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
+#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
+#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
+#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
+#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
+#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
+#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
+#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
+#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
+#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+
+/*
+ * KVM defined paravirt features.
+ */
+#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
+#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
+#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
+#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
+#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
+#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
+#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
+#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
+/* Bit 8 apparently isn't used?!?! */
+#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
+#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
+#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
+#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
+#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
+#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
+#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
+#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
+#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
+
+/*
+ * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
+ * value/property as opposed to a single-bit feature. Again, pack the info
+ * into a 64-bit value to pass by value with no overhead.
+ */
+struct kvm_x86_cpu_property {
+ u32 function;
+ u8 index;
+ u8 reg;
+ u8 lo_bit;
+ u8 hi_bit;
+};
+#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \
+({ \
+ struct kvm_x86_cpu_property property = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .lo_bit = low_bit, \
+ .hi_bit = high_bit, \
+ }; \
+ \
+ kvm_static_assert(low_bit < high_bit); \
+ kvm_static_assert((fn & 0xc0000000) == 0 || \
+ (fn & 0xc0000000) == 0x40000000 || \
+ (fn & 0xc0000000) == 0x80000000 || \
+ (fn & 0xc0000000) == 0xc0000000); \
+ kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
+ property; \
+})
+
+#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
+#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
+
+#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31)
+
+#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31)
+#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31)
+#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15)
+#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
+#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15)
+#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
+#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15)
+
+#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
+#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
+#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
+#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+
+#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
+
+/*
+ * Intel's architectural PMU events are bizarre. They have a "feature" bit
+ * that indicates the feature is _not_ supported, and a property that states
+ * the length of the bit mask of unsupported features. A feature is supported
+ * if the size of the bit mask is larger than the "unavailable" bit, and said
+ * bit is not set. Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters. Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
+ *
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
+ */
+struct kvm_x86_pmu_feature {
+ struct kvm_x86_cpu_feature f;
+};
+#define KVM_X86_PMU_FEATURE(__reg, __bit) \
+({ \
+ struct kvm_x86_pmu_feature feature = { \
+ .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \
+ }; \
+ \
+ kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \
+ KVM_CPUID_##__reg == KVM_CPUID_ECX); \
+ feature; \
+})
+
+#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3)
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+ unsigned int x86;
+
+ x86 = (eax >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+ return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
+
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK BIT_ULL(0)
+#define PTE_WRITABLE_MASK BIT_ULL(1)
+#define PTE_USER_MASK BIT_ULL(2)
+#define PTE_ACCESSED_MASK BIT_ULL(5)
+#define PTE_DIRTY_MASK BIT_ULL(6)
+#define PTE_LARGE_MASK BIT_ULL(7)
+#define PTE_GLOBAL_MASK BIT_ULL(8)
+#define PTE_NX_MASK BIT_ULL(63)
+
+#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
+
+#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
+#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x))
+#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
+
+#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK)
+#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT)
+
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 rsp;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:4, s:1, dpl:2, p:1;
+ unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+struct kvm_x86_state {
+ struct kvm_xsave *xsave;
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+ uint64_t tsc_val;
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed. If software requires RDTSC to be
+ * executed prior to execution of any subsequent instruction, it can
+ * execute LFENCE immediately after RDTSC
+ */
+ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+ tsc_val = ((uint64_t)edx) << 32 | eax;
+ return tsc_val;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ __asm__ __volatile__("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax | ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void wrpkru(u32 pkru)
+{
+ /* Note, ECX and EDX are architecturally required to be '0'. */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (pkru), "c"(0), "d"(0));
+}
+
+static inline struct desc_ptr get_gdt(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt;
+}
+
+static inline struct desc_ptr get_idt(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt;
+}
+
+static inline void outl(uint16_t port, uint32_t value)
+{
+ __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
+static inline void __cpuid(uint32_t function, uint32_t index,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ *eax = function;
+ *ecx = index;
+
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+static inline void cpuid(uint32_t function,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ return __cpuid(function, 0, eax, ebx, ecx, edx);
+}
+
+static inline uint32_t this_cpu_fms(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+
+static inline uint32_t this_cpu_family(void)
+{
+ return x86_family(this_cpu_fms());
+}
+
+static inline uint32_t this_cpu_model(void)
+{
+ return x86_model(this_cpu_fms());
+}
+
+static inline bool this_cpu_vendor_string_is(const char *vendor)
+{
+ const uint32_t *chunk = (const uint32_t *)vendor;
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(0, &eax, &ebx, &ecx, &edx);
+ return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+}
+
+static inline bool this_cpu_is_intel(void)
+{
+ return this_cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+static inline bool this_cpu_is_amd(void)
+{
+ return this_cpu_vendor_string_is("AuthenticAMD");
+}
+
+static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
+{
+ uint32_t gprs[4];
+
+ __cpuid(function, index,
+ &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
+ &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
+
+ return (gprs[reg] & GENMASK(hi, lo)) >> lo;
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ return __this_cpu_has(feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+{
+ return __this_cpu_has(property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
+
+static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+ uint32_t max_leaf;
+
+ switch (property.function & 0xc0000000) {
+ case 0:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+ break;
+ case 0x40000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+ break;
+ case 0x80000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+ break;
+ case 0xc0000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+ }
+ return max_leaf >= property.function;
+}
+
+static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+ uint32_t nr_bits;
+
+ if (feature.f.reg == KVM_CPUID_EBX) {
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+ }
+
+ GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.f.bit || this_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t this_cpu_supported_xcr0(void)
+{
+ if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
+
+static inline void read_sse_reg(int reg, sse128_t *data)
+{
+ switch (reg) {
+ case 0:
+ asm("movdqa %%xmm0, %0" : "=m"(*data));
+ break;
+ case 1:
+ asm("movdqa %%xmm1, %0" : "=m"(*data));
+ break;
+ case 2:
+ asm("movdqa %%xmm2, %0" : "=m"(*data));
+ break;
+ case 3:
+ asm("movdqa %%xmm3, %0" : "=m"(*data));
+ break;
+ case 4:
+ asm("movdqa %%xmm4, %0" : "=m"(*data));
+ break;
+ case 5:
+ asm("movdqa %%xmm5, %0" : "=m"(*data));
+ break;
+ case 6:
+ asm("movdqa %%xmm6, %0" : "=m"(*data));
+ break;
+ case 7:
+ asm("movdqa %%xmm7, %0" : "=m"(*data));
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void write_sse_reg(int reg, const sse128_t *data)
+{
+ switch (reg) {
+ case 0:
+ asm("movdqa %0, %%xmm0" : : "m"(*data));
+ break;
+ case 1:
+ asm("movdqa %0, %%xmm1" : : "m"(*data));
+ break;
+ case 2:
+ asm("movdqa %0, %%xmm2" : : "m"(*data));
+ break;
+ case 3:
+ asm("movdqa %0, %%xmm3" : : "m"(*data));
+ break;
+ case 4:
+ asm("movdqa %0, %%xmm4" : : "m"(*data));
+ break;
+ case 5:
+ asm("movdqa %0, %%xmm5" : : "m"(*data));
+ break;
+ case 6:
+ asm("movdqa %0, %%xmm6" : : "m"(*data));
+ break;
+ case 7:
+ asm("movdqa %0, %%xmm7" : : "m"(*data));
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void cpu_relax(void)
+{
+ asm volatile("rep; nop" ::: "memory");
+}
+
+static inline void udelay(unsigned long usec)
+{
+ uint64_t start, now, cycles;
+
+ GUEST_ASSERT(guest_tsc_khz);
+ cycles = guest_tsc_khz / 1000 * usec;
+
+ /*
+ * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
+ * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
+ */
+ start = rdtsc();
+ do {
+ now = rdtsc();
+ } while (now - start < cycles);
+}
+
+#define ud2() \
+ __asm__ __volatile__( \
+ "ud2\n" \
+ )
+
+#define hlt() \
+ __asm__ __volatile__( \
+ "hlt\n" \
+ )
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
+
+static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
+}
+static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
+}
+static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
+}
+static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
+}
+static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index);
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+
+static inline uint32_t kvm_cpu_fms(void)
+{
+ return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
+}
+
+static inline uint32_t kvm_cpu_family(void)
+{
+ return x86_family(kvm_cpu_fms());
+}
+
+static inline uint32_t kvm_cpu_model(void)
+{
+ return x86_model(kvm_cpu_fms());
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature);
+
+static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property);
+
+static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+{
+ return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
+}
+
+static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+ uint32_t max_leaf;
+
+ switch (property.function & 0xc0000000) {
+ case 0:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+ break;
+ case 0x40000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+ break;
+ case 0x80000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+ break;
+ case 0xc0000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+ }
+ return max_leaf >= property.function;
+}
+
+static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+ uint32_t nr_bits;
+
+ if (feature.f.reg == KVM_CPUID_EBX) {
+ nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+ }
+
+ TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+ nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+static inline size_t kvm_cpuid2_size(int nr_entries)
+{
+ return sizeof(struct kvm_cpuid2) +
+ sizeof(struct kvm_cpuid_entry2) * nr_entries;
+}
+
+/*
+ * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
+ * entries sized to hold @nr_entries. The caller is responsible for freeing
+ * the struct.
+ */
+static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
+{
+ struct kvm_cpuid2 *cpuid;
+
+ cpuid = malloc(kvm_cpuid2_size(nr_entries));
+ TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
+
+ cpuid->nent = nr_entries;
+
+ return cpuid;
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+
+static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function,
+ uint32_t index)
+{
+ return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
+ function, index);
+}
+
+static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function)
+{
+ return __vcpu_get_cpuid_entry(vcpu, function, 0);
+}
+
+static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+ if (r)
+ return r;
+
+ /* On success, refresh the cache to pick up adjustments made by KVM. */
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+ return 0;
+}
+
+static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+
+ /* Refresh the cache to pick up adjustments made by KVM. */
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set);
+
+static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
+
+}
+
+static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
+do { \
+ if (__builtin_constant_p(msr)) { \
+ TEST_ASSERT(cond, fmt, str, args); \
+ } else if (!(cond)) { \
+ char buf[16]; \
+ \
+ snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
+ TEST_ASSERT(cond, fmt, buf, args); \
+ } \
+} while (0)
+
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc. This is NOT an exhaustive list! The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
+{
+ return msr != MSR_IA32_TSC;
+}
+
+#define vcpu_set_msr(vcpu, msr, val) \
+do { \
+ uint64_t r, v = val; \
+ \
+ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
+ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
+ if (!is_durable_msr(msr)) \
+ break; \
+ r = vcpu_get_msr(vcpu, msr); \
+ TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+struct ex_regs {
+ uint64_t rax, rcx, rdx, rbx;
+ uint64_t rbp, rsi, rdi;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+ uint64_t vector;
+ uint64_t error_code;
+ uint64_t rip;
+ uint64_t cs;
+ uint64_t rflags;
+};
+
+struct idt_entry {
+ uint16_t offset0;
+ uint16_t selector;
+ uint16_t ist : 3;
+ uint16_t : 5;
+ uint16_t type : 4;
+ uint16_t : 1;
+ uint16_t dpl : 2;
+ uint16_t p : 1;
+ uint16_t offset1;
+ uint32_t offset2; uint32_t reserved;
+};
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *));
+
+/* If a toddler were to say "abracadabra". */
+#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
+
+/*
+ * KVM selftest exception fixup uses registers to coordinate with the exception
+ * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
+ * per-CPU data. Using only registers avoids having to map memory into the
+ * guest, doesn't require a valid, stable GS.base, and reduces the risk of
+ * for recursive faults when accessing memory in the handler. The downside to
+ * using registers is that it restricts what registers can be used by the actual
+ * instruction. But, selftests are 64-bit only, making register* pressure a
+ * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved
+ * by the callee, and except for r11 are not implicit parameters to any
+ * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit
+ * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
+ * is higher priority than testing non-faulting SYSCALL/SYSRET.
+ *
+ * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
+ * is guaranteed to be non-zero on fault.
+ *
+ * REGISTER INPUTS:
+ * r9 = MAGIC
+ * r10 = RIP
+ * r11 = new RIP on fault
+ *
+ * REGISTER OUTPUTS:
+ * r9 = exception vector (non-zero)
+ * r10 = error code
+ */
+#define __KVM_ASM_SAFE(insn, fep) \
+ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
+ "lea 1f(%%rip), %%r10\n\t" \
+ "lea 2f(%%rip), %%r11\n\t" \
+ fep "1: " insn "\n\t" \
+ "xor %%r9, %%r9\n\t" \
+ "2:\n\t" \
+ "mov %%r9b, %[vector]\n\t" \
+ "mov %%r10, %[error_code]\n\t"
+
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
+#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec)
+#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
+
+#define kvm_asm_safe(insn, inputs...) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_ec(insn, error_code, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_fep(insn, inputs...) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
+{ \
+ uint64_t error_code; \
+ uint8_t vector; \
+ uint32_t a, d; \
+ \
+ asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
+ : "=a"(a), "=d"(d), \
+ KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : "c"(idx) \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ \
+ *val = (uint64_t)a | ((uint64_t)d << 32); \
+ return vector; \
+}
+
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn) \
+ BUILD_READ_U64_SAFE_HELPER(insn, , ) \
+ BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
+static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+{
+ return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
+}
+
+static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+}
+
+bool kvm_is_tdp_enabled(void);
+
+static inline bool kvm_is_pmu_enabled(void)
+{
+ return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+ return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3);
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+
+static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
+ uint64_t size, uint64_t flags)
+{
+ return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
+}
+
+static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
+ uint64_t flags)
+{
+ uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
+
+ GUEST_ASSERT(!ret);
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
+
+#define vm_xsave_require_permission(xfeature) \
+ __vm_xsave_require_permission(xfeature, #xfeature)
+
+enum pg_level {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_512G,
+ PG_LEVEL_NUM
+};
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM (1UL<<2) /* Emulation */
+#define X86_CR0_TS (1UL<<3) /* Task Switched */
+#define X86_CR0_ET (1UL<<4) /* Extension Type */
+#define X86_CR0_NE (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP (1UL<<16) /* Write Protect */
+#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG (1UL<<31) /* Paging */
+
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
+
+#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+
+bool sys_clocksource_is_based_on_tsc(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+ SEV_GUEST_STATE_UNINITIALIZED = 0,
+ SEV_GUEST_STATE_LAUNCH_UPDATE,
+ SEV_GUEST_STATE_LAUNCH_SECRET,
+ SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG (1UL << 0)
+#define SEV_POLICY_ES (1UL << 2)
+
+#define GHCB_MSR_TERM_REQ 0x100
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+ struct kvm_vcpu **cpu);
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct. The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg) \
+({ \
+ int r; \
+ \
+ union { \
+ struct kvm_sev_cmd c; \
+ unsigned long raw; \
+ } sev_cmd = { .c = { \
+ .id = (cmd), \
+ .data = (uint64_t)(arg), \
+ .sev_fd = (vm)->arch.sev_fd, \
+ } }; \
+ \
+ r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \
+ r ?: sev_cmd.c.error; \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_sev_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+void sev_vm_init(struct kvm_vm *vm);
+void sev_es_vm_init(struct kvm_vm *vm);
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ struct kvm_enc_region range = {
+ .addr = region->region.userspace_addr,
+ .size = region->region.memory_size,
+ };
+
+ vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t size)
+{
+ struct kvm_sev_launch_update_data update_data = {
+ .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+ .len = size,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SELFTEST_KVM_SVM_H
+#define SELFTEST_KVM_SVM_H
+
+enum {
+ INTERCEPT_INTR,
+ INTERCEPT_NMI,
+ INTERCEPT_SMI,
+ INTERCEPT_INIT,
+ INTERCEPT_VINTR,
+ INTERCEPT_SELECTIVE_CR0,
+ INTERCEPT_STORE_IDTR,
+ INTERCEPT_STORE_GDTR,
+ INTERCEPT_STORE_LDTR,
+ INTERCEPT_STORE_TR,
+ INTERCEPT_LOAD_IDTR,
+ INTERCEPT_LOAD_GDTR,
+ INTERCEPT_LOAD_LDTR,
+ INTERCEPT_LOAD_TR,
+ INTERCEPT_RDTSC,
+ INTERCEPT_RDPMC,
+ INTERCEPT_PUSHF,
+ INTERCEPT_POPF,
+ INTERCEPT_CPUID,
+ INTERCEPT_RSM,
+ INTERCEPT_IRET,
+ INTERCEPT_INTn,
+ INTERCEPT_INVD,
+ INTERCEPT_PAUSE,
+ INTERCEPT_HLT,
+ INTERCEPT_INVLPG,
+ INTERCEPT_INVLPGA,
+ INTERCEPT_IOIO_PROT,
+ INTERCEPT_MSR_PROT,
+ INTERCEPT_TASK_SWITCH,
+ INTERCEPT_FERR_FREEZE,
+ INTERCEPT_SHUTDOWN,
+ INTERCEPT_VMRUN,
+ INTERCEPT_VMMCALL,
+ INTERCEPT_VMLOAD,
+ INTERCEPT_VMSAVE,
+ INTERCEPT_STGI,
+ INTERCEPT_CLGI,
+ INTERCEPT_SKINIT,
+ INTERCEPT_RDTSCP,
+ INTERCEPT_ICEBP,
+ INTERCEPT_WBINVD,
+ INTERCEPT_MONITOR,
+ INTERCEPT_MWAIT,
+ INTERCEPT_MWAIT_COND,
+ INTERCEPT_XSETBV,
+ INTERCEPT_RDPRU,
+};
+
+struct hv_vmcb_enlightenments {
+ struct __packed hv_enlightenments_control {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 enlightened_npt_tlb: 1;
+ u32 reserved:29;
+ } __packed hv_enlightenments_control;
+ u32 hv_vp_id;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
+
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL 0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+ u32 intercept_cr;
+ u32 intercept_dr;
+ u32 intercept_exceptions;
+ u64 intercept;
+ u8 reserved_1[40];
+ u16 pause_filter_thresh;
+ u16 pause_filter_count;
+ u64 iopm_base_pa;
+ u64 msrpm_base_pa;
+ u64 tsc_offset;
+ u32 asid;
+ u8 tlb_ctl;
+ u8 reserved_2[3];
+ u32 int_ctl;
+ u32 int_vector;
+ u32 int_state;
+ u8 reserved_3[4];
+ u32 exit_code;
+ u32 exit_code_hi;
+ u64 exit_info_1;
+ u64 exit_info_2;
+ u32 exit_int_info;
+ u32 exit_int_info_err;
+ u64 nested_ctl;
+ u64 avic_vapic_bar;
+ u8 reserved_4[8];
+ u32 event_inj;
+ u32 event_inj_err;
+ u64 nested_cr3;
+ u64 virt_ext;
+ u32 clean;
+ u32 reserved_5;
+ u64 next_rip;
+ u8 insn_len;
+ u8 insn_bytes[15];
+ u64 avic_backing_page; /* Offset 0xe0 */
+ u8 reserved_6[8]; /* Offset 0xe8 */
+ u64 avic_logical_id; /* Offset 0xf0 */
+ u64 avic_physical_id; /* Offset 0xf8 */
+ u8 reserved_7[8];
+ u64 vmsa_pa; /* Used for an SEV-ES guest */
+ u8 reserved_8[720];
+ /*
+ * Offset 0x3e0, 32 bytes reserved
+ * for use by hypervisor/software.
+ */
+ union {
+ struct hv_vmcb_enlightenments hv_enlightenments;
+ u8 reserved_sw[32];
+ };
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+#define TLB_CONTROL_FLUSH_ASID 3
+#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK 0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+
+#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+ u16 selector;
+ u16 attrib;
+ u32 limit;
+ u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u8 reserved_1[43];
+ u8 cpl;
+ u8 reserved_2[4];
+ u64 efer;
+ u8 reserved_3[112];
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u8 reserved_4[88];
+ u64 rsp;
+ u8 reserved_5[24];
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_6[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+ struct vmcb_control_area control;
+ struct vmcb_save_area save;
+};
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_READ 0
+#define INTERCEPT_CR3_READ 3
+#define INTERCEPT_CR4_READ 4
+#define INTERCEPT_CR8_READ 8
+#define INTERCEPT_CR0_WRITE (16 + 0)
+#define INTERCEPT_CR3_WRITE (16 + 3)
+#define INTERCEPT_CR4_WRITE (16 + 4)
+#define INTERCEPT_CR8_WRITE (16 + 8)
+
+#define INTERCEPT_DR0_READ 0
+#define INTERCEPT_DR1_READ 1
+#define INTERCEPT_DR2_READ 2
+#define INTERCEPT_DR3_READ 3
+#define INTERCEPT_DR4_READ 4
+#define INTERCEPT_DR5_READ 5
+#define INTERCEPT_DR6_READ 6
+#define INTERCEPT_DR7_READ 7
+#define INTERCEPT_DR0_WRITE (16 + 0)
+#define INTERCEPT_DR1_WRITE (16 + 1)
+#define INTERCEPT_DR2_WRITE (16 + 2)
+#define INTERCEPT_DR3_WRITE (16 + 3)
+#define INTERCEPT_DR4_WRITE (16 + 4)
+#define INTERCEPT_DR5_WRITE (16 + 5)
+#define INTERCEPT_DR6_WRITE (16 + 6)
+#define INTERCEPT_DR7_WRITE (16 + 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXITINFO_REG_MASK 0x0F
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#endif /* SELFTEST_KVM_SVM_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <asm/svm.h>
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+struct svm_test_data {
+ /* VMCB */
+ struct vmcb *vmcb; /* gva */
+ void *vmcb_hva;
+ uint64_t vmcb_gpa;
+
+ /* host state-save area */
+ struct vmcb_save_area *save_area; /* gva */
+ void *save_area_hva;
+ uint64_t save_area_gpa;
+
+ /* MSR-Bitmap */
+ void *msr; /* gva */
+ void *msr_hva;
+ uint64_t msr_gpa;
+};
+
+static inline void vmmcall(void)
+{
+ /*
+ * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+ * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+ * use of this function is to exit to L1 from L2. Clobber all other
+ * GPRs as L1 doesn't correctly preserve them during vmexits.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+#define stgi() \
+ __asm__ __volatile__( \
+ "stgi\n" \
+ )
+
+#define clgi() \
+ __asm__ __volatile__( \
+ "clgi\n" \
+ )
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+
+int open_sev_dev_path_or_exit(void);
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <asm/vmx.h>
+
+#include <stdint.h>
+#include "processor.h"
+#include "apic.h"
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVLPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
+#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EPT_VE 0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x00000001
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VIRTUAL_NMIS 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_SMM 0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+
+#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
+
+#define EXIT_REASON_FAILED_VMENTRY 0x80000000
+
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ ENCLS_EXITING_BITMAP = 0x0000202E,
+ ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t value;
+} __attribute__ ((aligned(16)));
+
+#include "evmcs.h"
+
+static inline int vmxon(uint64_t phys)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(phys)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline void vmxoff(void)
+{
+ __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ if (enable_evmcs)
+ return -1;
+
+ __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ if (enable_evmcs)
+ return evmcs_vmptrst(value);
+
+ __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+ : [value]"=m"(tmp), [ret]"=rm"(ret)
+ : : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+ uint64_t value = 0;
+ vmptrst(&value);
+ return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+ int ret;
+
+ if (enable_evmcs)
+ return evmcs_vmlaunch();
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+ int ret;
+
+ if (enable_evmcs)
+ return evmcs_vmresume();
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+static inline void vmcall(void)
+{
+ /*
+ * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+ * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+ * use of this function is to exit to L1 from L2. Clobber all other
+ * GPRs as L1 doesn't correctly preserve them during vmexits.
+ */
+ __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ if (enable_evmcs)
+ return evmcs_vmread(encoding, value);
+
+ __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+ : [value]"=rm"(tmp), [ret]"=rm"(ret)
+ : [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+ uint64_t value = 0;
+ vmread(encoding, &value);
+ return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+ uint8_t ret;
+
+ if (enable_evmcs)
+ return evmcs_vmwrite(encoding, value);
+
+ __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [value]"rm"(value), [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+ return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+ void *vmxon_hva;
+ uint64_t vmxon_gpa;
+ void *vmxon;
+
+ void *vmcs_hva;
+ uint64_t vmcs_gpa;
+ void *vmcs;
+
+ void *msr_hva;
+ uint64_t msr_gpa;
+ void *msr;
+
+ void *shadow_vmcs_hva;
+ uint64_t shadow_vmcs_gpa;
+ void *shadow_vmcs;
+
+ void *vmread_hva;
+ uint64_t vmread_gpa;
+ void *vmread;
+
+ void *vmwrite_hva;
+ uint64_t vmwrite_gpa;
+ void *vmwrite;
+
+ void *eptp_hva;
+ uint64_t eptp_gpa;
+ void *eptp;
+
+ void *apic_access_hva;
+ uint64_t apic_access_gpa;
+ void *apic_access;
+};
+
+union vmx_basic {
+ u64 val;
+ struct {
+ u32 revision;
+ u32 size:13,
+ reserved1:3,
+ width:1,
+ dual:1,
+ type:4,
+ insouts:1,
+ ctrl:1,
+ vm_entry_exception_ctrl:1,
+ reserved2:7;
+ };
+};
+
+union vmx_ctrl_msr {
+ u64 val;
+ struct {
+ u32 set, clr;
+ };
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
+
+bool ept_1g_pages_supported(void);
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr);
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size);
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size);
+bool kvm_cpu_has_ept(void);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_VMX_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/apic.h
- *
- * Copyright (C) 2021, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_APIC_H
-#define SELFTEST_KVM_APIC_H
-
-#include <stdint.h>
-
-#include "processor.h"
-#include "ucall_common.h"
-
-#define APIC_DEFAULT_GPA 0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE 0x0000001b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_EXTD (1<<10)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define GET_APIC_BASE(x) (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR 0x800
-#define X2APIC_ENABLE (1UL << 10)
-#define APIC_ID 0x20
-#define APIC_LVR 0x30
-#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
-#define APIC_TASKPRI 0x80
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
-#define APIC_SPIV_APIC_ENABLED (1 << 8)
-#define APIC_IRR 0x200
-#define APIC_ICR 0x300
-#define APIC_LVTCMCI 0x2f0
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_FIXED_MASK 0x00700
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
-#define APIC_LVTT 0x320
-#define APIC_LVT_TIMER_ONESHOT (0 << 17)
-#define APIC_LVT_TIMER_PERIODIC (1 << 17)
-#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
-#define APIC_LVT_MASKED (1 << 16)
-#define APIC_TMICT 0x380
-#define APIC_TMCCT 0x390
-#define APIC_TDCR 0x3E0
-
-void apic_disable(void);
-void xapic_enable(void);
-void x2apic_enable(void);
-
-static inline uint32_t get_bsp_flag(void)
-{
- return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
-static inline uint32_t xapic_read_reg(unsigned int reg)
-{
- return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
-}
-
-static inline void xapic_write_reg(unsigned int reg, uint32_t val)
-{
- ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
-}
-
-static inline uint64_t x2apic_read_reg(unsigned int reg)
-{
- return rdmsr(APIC_BASE_MSR + (reg >> 4));
-}
-
-static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
-{
- return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
-}
-
-static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
-{
- uint8_t fault = x2apic_write_reg_safe(reg, value);
-
- __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
- fault, APIC_BASE_MSR + (reg >> 4), value);
-}
-
-static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
-{
- uint8_t fault = x2apic_write_reg_safe(reg, value);
-
- __GUEST_ASSERT(fault == GP_VECTOR,
- "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
- APIC_BASE_MSR + (reg >> 4), value, fault);
-}
-
-
-#endif /* SELFTEST_KVM_APIC_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/evmcs.h
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- */
-
-#ifndef SELFTEST_KVM_EVMCS_H
-#define SELFTEST_KVM_EVMCS_H
-
-#include <stdint.h>
-#include "hyperv.h"
-#include "vmx.h"
-
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#define EVMCS_VERSION 1
-
-extern bool enable_evmcs;
-
-struct hv_enlightened_vmcs {
- u32 revision_id;
- u32 abort;
-
- u16 host_es_selector;
- u16 host_cs_selector;
- u16 host_ss_selector;
- u16 host_ds_selector;
- u16 host_fs_selector;
- u16 host_gs_selector;
- u16 host_tr_selector;
-
- u16 padding16_1;
-
- u64 host_ia32_pat;
- u64 host_ia32_efer;
-
- u64 host_cr0;
- u64 host_cr3;
- u64 host_cr4;
-
- u64 host_ia32_sysenter_esp;
- u64 host_ia32_sysenter_eip;
- u64 host_rip;
- u32 host_ia32_sysenter_cs;
-
- u32 pin_based_vm_exec_control;
- u32 vm_exit_controls;
- u32 secondary_vm_exec_control;
-
- u64 io_bitmap_a;
- u64 io_bitmap_b;
- u64 msr_bitmap;
-
- u16 guest_es_selector;
- u16 guest_cs_selector;
- u16 guest_ss_selector;
- u16 guest_ds_selector;
- u16 guest_fs_selector;
- u16 guest_gs_selector;
- u16 guest_ldtr_selector;
- u16 guest_tr_selector;
-
- u32 guest_es_limit;
- u32 guest_cs_limit;
- u32 guest_ss_limit;
- u32 guest_ds_limit;
- u32 guest_fs_limit;
- u32 guest_gs_limit;
- u32 guest_ldtr_limit;
- u32 guest_tr_limit;
- u32 guest_gdtr_limit;
- u32 guest_idtr_limit;
-
- u32 guest_es_ar_bytes;
- u32 guest_cs_ar_bytes;
- u32 guest_ss_ar_bytes;
- u32 guest_ds_ar_bytes;
- u32 guest_fs_ar_bytes;
- u32 guest_gs_ar_bytes;
- u32 guest_ldtr_ar_bytes;
- u32 guest_tr_ar_bytes;
-
- u64 guest_es_base;
- u64 guest_cs_base;
- u64 guest_ss_base;
- u64 guest_ds_base;
- u64 guest_fs_base;
- u64 guest_gs_base;
- u64 guest_ldtr_base;
- u64 guest_tr_base;
- u64 guest_gdtr_base;
- u64 guest_idtr_base;
-
- u64 padding64_1[3];
-
- u64 vm_exit_msr_store_addr;
- u64 vm_exit_msr_load_addr;
- u64 vm_entry_msr_load_addr;
-
- u64 cr3_target_value0;
- u64 cr3_target_value1;
- u64 cr3_target_value2;
- u64 cr3_target_value3;
-
- u32 page_fault_error_code_mask;
- u32 page_fault_error_code_match;
-
- u32 cr3_target_count;
- u32 vm_exit_msr_store_count;
- u32 vm_exit_msr_load_count;
- u32 vm_entry_msr_load_count;
-
- u64 tsc_offset;
- u64 virtual_apic_page_addr;
- u64 vmcs_link_pointer;
-
- u64 guest_ia32_debugctl;
- u64 guest_ia32_pat;
- u64 guest_ia32_efer;
-
- u64 guest_pdptr0;
- u64 guest_pdptr1;
- u64 guest_pdptr2;
- u64 guest_pdptr3;
-
- u64 guest_pending_dbg_exceptions;
- u64 guest_sysenter_esp;
- u64 guest_sysenter_eip;
-
- u32 guest_activity_state;
- u32 guest_sysenter_cs;
-
- u64 cr0_guest_host_mask;
- u64 cr4_guest_host_mask;
- u64 cr0_read_shadow;
- u64 cr4_read_shadow;
- u64 guest_cr0;
- u64 guest_cr3;
- u64 guest_cr4;
- u64 guest_dr7;
-
- u64 host_fs_base;
- u64 host_gs_base;
- u64 host_tr_base;
- u64 host_gdtr_base;
- u64 host_idtr_base;
- u64 host_rsp;
-
- u64 ept_pointer;
-
- u16 virtual_processor_id;
- u16 padding16_2[3];
-
- u64 padding64_2[5];
- u64 guest_physical_address;
-
- u32 vm_instruction_error;
- u32 vm_exit_reason;
- u32 vm_exit_intr_info;
- u32 vm_exit_intr_error_code;
- u32 idt_vectoring_info_field;
- u32 idt_vectoring_error_code;
- u32 vm_exit_instruction_len;
- u32 vmx_instruction_info;
-
- u64 exit_qualification;
- u64 exit_io_instruction_ecx;
- u64 exit_io_instruction_esi;
- u64 exit_io_instruction_edi;
- u64 exit_io_instruction_eip;
-
- u64 guest_linear_address;
- u64 guest_rsp;
- u64 guest_rflags;
-
- u32 guest_interruptibility_info;
- u32 cpu_based_vm_exec_control;
- u32 exception_bitmap;
- u32 vm_entry_controls;
- u32 vm_entry_intr_info_field;
- u32 vm_entry_exception_error_code;
- u32 vm_entry_instruction_len;
- u32 tpr_threshold;
-
- u64 guest_rip;
-
- u32 hv_clean_fields;
- u32 padding32_1;
- u32 hv_synthetic_controls;
- struct {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 reserved:30;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
- u32 padding32_2;
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 padding64_4[4];
- u64 guest_bndcfgs;
- u64 guest_ia32_perf_global_ctrl;
- u64 guest_ia32_s_cet;
- u64 guest_ssp;
- u64 guest_ia32_int_ssp_table_addr;
- u64 guest_ia32_lbr_ctl;
- u64 padding64_5[2];
- u64 xss_exit_bitmap;
- u64 encls_exiting_bitmap;
- u64 host_ia32_perf_global_ctrl;
- u64 tsc_multiplier;
- u64 host_ia32_s_cet;
- u64 host_ssp;
- u64 host_ia32_int_ssp_table_addr;
- u64 padding64_6;
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
-
-#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
-
-extern struct hv_enlightened_vmcs *current_evmcs;
-
-int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
-
-static inline void evmcs_enable(void)
-{
- enable_evmcs = true;
-}
-
-static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
-{
- current_vp_assist->current_nested_vmcs = vmcs_pa;
- current_vp_assist->enlighten_vmentry = 1;
-
- current_evmcs = vmcs;
-
- return 0;
-}
-
-static inline bool load_evmcs(struct hyperv_test_pages *hv)
-{
- if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
- return false;
-
- current_evmcs->revision_id = EVMCS_VERSION;
-
- return true;
-}
-
-static inline int evmcs_vmptrst(uint64_t *value)
-{
- *value = current_vp_assist->current_nested_vmcs &
- ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
- return 0;
-}
-
-static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
-{
- switch (encoding) {
- case GUEST_RIP:
- *value = current_evmcs->guest_rip;
- break;
- case GUEST_RSP:
- *value = current_evmcs->guest_rsp;
- break;
- case GUEST_RFLAGS:
- *value = current_evmcs->guest_rflags;
- break;
- case HOST_IA32_PAT:
- *value = current_evmcs->host_ia32_pat;
- break;
- case HOST_IA32_EFER:
- *value = current_evmcs->host_ia32_efer;
- break;
- case HOST_CR0:
- *value = current_evmcs->host_cr0;
- break;
- case HOST_CR3:
- *value = current_evmcs->host_cr3;
- break;
- case HOST_CR4:
- *value = current_evmcs->host_cr4;
- break;
- case HOST_IA32_SYSENTER_ESP:
- *value = current_evmcs->host_ia32_sysenter_esp;
- break;
- case HOST_IA32_SYSENTER_EIP:
- *value = current_evmcs->host_ia32_sysenter_eip;
- break;
- case HOST_RIP:
- *value = current_evmcs->host_rip;
- break;
- case IO_BITMAP_A:
- *value = current_evmcs->io_bitmap_a;
- break;
- case IO_BITMAP_B:
- *value = current_evmcs->io_bitmap_b;
- break;
- case MSR_BITMAP:
- *value = current_evmcs->msr_bitmap;
- break;
- case GUEST_ES_BASE:
- *value = current_evmcs->guest_es_base;
- break;
- case GUEST_CS_BASE:
- *value = current_evmcs->guest_cs_base;
- break;
- case GUEST_SS_BASE:
- *value = current_evmcs->guest_ss_base;
- break;
- case GUEST_DS_BASE:
- *value = current_evmcs->guest_ds_base;
- break;
- case GUEST_FS_BASE:
- *value = current_evmcs->guest_fs_base;
- break;
- case GUEST_GS_BASE:
- *value = current_evmcs->guest_gs_base;
- break;
- case GUEST_LDTR_BASE:
- *value = current_evmcs->guest_ldtr_base;
- break;
- case GUEST_TR_BASE:
- *value = current_evmcs->guest_tr_base;
- break;
- case GUEST_GDTR_BASE:
- *value = current_evmcs->guest_gdtr_base;
- break;
- case GUEST_IDTR_BASE:
- *value = current_evmcs->guest_idtr_base;
- break;
- case TSC_OFFSET:
- *value = current_evmcs->tsc_offset;
- break;
- case VIRTUAL_APIC_PAGE_ADDR:
- *value = current_evmcs->virtual_apic_page_addr;
- break;
- case VMCS_LINK_POINTER:
- *value = current_evmcs->vmcs_link_pointer;
- break;
- case GUEST_IA32_DEBUGCTL:
- *value = current_evmcs->guest_ia32_debugctl;
- break;
- case GUEST_IA32_PAT:
- *value = current_evmcs->guest_ia32_pat;
- break;
- case GUEST_IA32_EFER:
- *value = current_evmcs->guest_ia32_efer;
- break;
- case GUEST_PDPTR0:
- *value = current_evmcs->guest_pdptr0;
- break;
- case GUEST_PDPTR1:
- *value = current_evmcs->guest_pdptr1;
- break;
- case GUEST_PDPTR2:
- *value = current_evmcs->guest_pdptr2;
- break;
- case GUEST_PDPTR3:
- *value = current_evmcs->guest_pdptr3;
- break;
- case GUEST_PENDING_DBG_EXCEPTIONS:
- *value = current_evmcs->guest_pending_dbg_exceptions;
- break;
- case GUEST_SYSENTER_ESP:
- *value = current_evmcs->guest_sysenter_esp;
- break;
- case GUEST_SYSENTER_EIP:
- *value = current_evmcs->guest_sysenter_eip;
- break;
- case CR0_GUEST_HOST_MASK:
- *value = current_evmcs->cr0_guest_host_mask;
- break;
- case CR4_GUEST_HOST_MASK:
- *value = current_evmcs->cr4_guest_host_mask;
- break;
- case CR0_READ_SHADOW:
- *value = current_evmcs->cr0_read_shadow;
- break;
- case CR4_READ_SHADOW:
- *value = current_evmcs->cr4_read_shadow;
- break;
- case GUEST_CR0:
- *value = current_evmcs->guest_cr0;
- break;
- case GUEST_CR3:
- *value = current_evmcs->guest_cr3;
- break;
- case GUEST_CR4:
- *value = current_evmcs->guest_cr4;
- break;
- case GUEST_DR7:
- *value = current_evmcs->guest_dr7;
- break;
- case HOST_FS_BASE:
- *value = current_evmcs->host_fs_base;
- break;
- case HOST_GS_BASE:
- *value = current_evmcs->host_gs_base;
- break;
- case HOST_TR_BASE:
- *value = current_evmcs->host_tr_base;
- break;
- case HOST_GDTR_BASE:
- *value = current_evmcs->host_gdtr_base;
- break;
- case HOST_IDTR_BASE:
- *value = current_evmcs->host_idtr_base;
- break;
- case HOST_RSP:
- *value = current_evmcs->host_rsp;
- break;
- case EPT_POINTER:
- *value = current_evmcs->ept_pointer;
- break;
- case GUEST_BNDCFGS:
- *value = current_evmcs->guest_bndcfgs;
- break;
- case XSS_EXIT_BITMAP:
- *value = current_evmcs->xss_exit_bitmap;
- break;
- case GUEST_PHYSICAL_ADDRESS:
- *value = current_evmcs->guest_physical_address;
- break;
- case EXIT_QUALIFICATION:
- *value = current_evmcs->exit_qualification;
- break;
- case GUEST_LINEAR_ADDRESS:
- *value = current_evmcs->guest_linear_address;
- break;
- case VM_EXIT_MSR_STORE_ADDR:
- *value = current_evmcs->vm_exit_msr_store_addr;
- break;
- case VM_EXIT_MSR_LOAD_ADDR:
- *value = current_evmcs->vm_exit_msr_load_addr;
- break;
- case VM_ENTRY_MSR_LOAD_ADDR:
- *value = current_evmcs->vm_entry_msr_load_addr;
- break;
- case CR3_TARGET_VALUE0:
- *value = current_evmcs->cr3_target_value0;
- break;
- case CR3_TARGET_VALUE1:
- *value = current_evmcs->cr3_target_value1;
- break;
- case CR3_TARGET_VALUE2:
- *value = current_evmcs->cr3_target_value2;
- break;
- case CR3_TARGET_VALUE3:
- *value = current_evmcs->cr3_target_value3;
- break;
- case TPR_THRESHOLD:
- *value = current_evmcs->tpr_threshold;
- break;
- case GUEST_INTERRUPTIBILITY_INFO:
- *value = current_evmcs->guest_interruptibility_info;
- break;
- case CPU_BASED_VM_EXEC_CONTROL:
- *value = current_evmcs->cpu_based_vm_exec_control;
- break;
- case EXCEPTION_BITMAP:
- *value = current_evmcs->exception_bitmap;
- break;
- case VM_ENTRY_CONTROLS:
- *value = current_evmcs->vm_entry_controls;
- break;
- case VM_ENTRY_INTR_INFO_FIELD:
- *value = current_evmcs->vm_entry_intr_info_field;
- break;
- case VM_ENTRY_EXCEPTION_ERROR_CODE:
- *value = current_evmcs->vm_entry_exception_error_code;
- break;
- case VM_ENTRY_INSTRUCTION_LEN:
- *value = current_evmcs->vm_entry_instruction_len;
- break;
- case HOST_IA32_SYSENTER_CS:
- *value = current_evmcs->host_ia32_sysenter_cs;
- break;
- case PIN_BASED_VM_EXEC_CONTROL:
- *value = current_evmcs->pin_based_vm_exec_control;
- break;
- case VM_EXIT_CONTROLS:
- *value = current_evmcs->vm_exit_controls;
- break;
- case SECONDARY_VM_EXEC_CONTROL:
- *value = current_evmcs->secondary_vm_exec_control;
- break;
- case GUEST_ES_LIMIT:
- *value = current_evmcs->guest_es_limit;
- break;
- case GUEST_CS_LIMIT:
- *value = current_evmcs->guest_cs_limit;
- break;
- case GUEST_SS_LIMIT:
- *value = current_evmcs->guest_ss_limit;
- break;
- case GUEST_DS_LIMIT:
- *value = current_evmcs->guest_ds_limit;
- break;
- case GUEST_FS_LIMIT:
- *value = current_evmcs->guest_fs_limit;
- break;
- case GUEST_GS_LIMIT:
- *value = current_evmcs->guest_gs_limit;
- break;
- case GUEST_LDTR_LIMIT:
- *value = current_evmcs->guest_ldtr_limit;
- break;
- case GUEST_TR_LIMIT:
- *value = current_evmcs->guest_tr_limit;
- break;
- case GUEST_GDTR_LIMIT:
- *value = current_evmcs->guest_gdtr_limit;
- break;
- case GUEST_IDTR_LIMIT:
- *value = current_evmcs->guest_idtr_limit;
- break;
- case GUEST_ES_AR_BYTES:
- *value = current_evmcs->guest_es_ar_bytes;
- break;
- case GUEST_CS_AR_BYTES:
- *value = current_evmcs->guest_cs_ar_bytes;
- break;
- case GUEST_SS_AR_BYTES:
- *value = current_evmcs->guest_ss_ar_bytes;
- break;
- case GUEST_DS_AR_BYTES:
- *value = current_evmcs->guest_ds_ar_bytes;
- break;
- case GUEST_FS_AR_BYTES:
- *value = current_evmcs->guest_fs_ar_bytes;
- break;
- case GUEST_GS_AR_BYTES:
- *value = current_evmcs->guest_gs_ar_bytes;
- break;
- case GUEST_LDTR_AR_BYTES:
- *value = current_evmcs->guest_ldtr_ar_bytes;
- break;
- case GUEST_TR_AR_BYTES:
- *value = current_evmcs->guest_tr_ar_bytes;
- break;
- case GUEST_ACTIVITY_STATE:
- *value = current_evmcs->guest_activity_state;
- break;
- case GUEST_SYSENTER_CS:
- *value = current_evmcs->guest_sysenter_cs;
- break;
- case VM_INSTRUCTION_ERROR:
- *value = current_evmcs->vm_instruction_error;
- break;
- case VM_EXIT_REASON:
- *value = current_evmcs->vm_exit_reason;
- break;
- case VM_EXIT_INTR_INFO:
- *value = current_evmcs->vm_exit_intr_info;
- break;
- case VM_EXIT_INTR_ERROR_CODE:
- *value = current_evmcs->vm_exit_intr_error_code;
- break;
- case IDT_VECTORING_INFO_FIELD:
- *value = current_evmcs->idt_vectoring_info_field;
- break;
- case IDT_VECTORING_ERROR_CODE:
- *value = current_evmcs->idt_vectoring_error_code;
- break;
- case VM_EXIT_INSTRUCTION_LEN:
- *value = current_evmcs->vm_exit_instruction_len;
- break;
- case VMX_INSTRUCTION_INFO:
- *value = current_evmcs->vmx_instruction_info;
- break;
- case PAGE_FAULT_ERROR_CODE_MASK:
- *value = current_evmcs->page_fault_error_code_mask;
- break;
- case PAGE_FAULT_ERROR_CODE_MATCH:
- *value = current_evmcs->page_fault_error_code_match;
- break;
- case CR3_TARGET_COUNT:
- *value = current_evmcs->cr3_target_count;
- break;
- case VM_EXIT_MSR_STORE_COUNT:
- *value = current_evmcs->vm_exit_msr_store_count;
- break;
- case VM_EXIT_MSR_LOAD_COUNT:
- *value = current_evmcs->vm_exit_msr_load_count;
- break;
- case VM_ENTRY_MSR_LOAD_COUNT:
- *value = current_evmcs->vm_entry_msr_load_count;
- break;
- case HOST_ES_SELECTOR:
- *value = current_evmcs->host_es_selector;
- break;
- case HOST_CS_SELECTOR:
- *value = current_evmcs->host_cs_selector;
- break;
- case HOST_SS_SELECTOR:
- *value = current_evmcs->host_ss_selector;
- break;
- case HOST_DS_SELECTOR:
- *value = current_evmcs->host_ds_selector;
- break;
- case HOST_FS_SELECTOR:
- *value = current_evmcs->host_fs_selector;
- break;
- case HOST_GS_SELECTOR:
- *value = current_evmcs->host_gs_selector;
- break;
- case HOST_TR_SELECTOR:
- *value = current_evmcs->host_tr_selector;
- break;
- case GUEST_ES_SELECTOR:
- *value = current_evmcs->guest_es_selector;
- break;
- case GUEST_CS_SELECTOR:
- *value = current_evmcs->guest_cs_selector;
- break;
- case GUEST_SS_SELECTOR:
- *value = current_evmcs->guest_ss_selector;
- break;
- case GUEST_DS_SELECTOR:
- *value = current_evmcs->guest_ds_selector;
- break;
- case GUEST_FS_SELECTOR:
- *value = current_evmcs->guest_fs_selector;
- break;
- case GUEST_GS_SELECTOR:
- *value = current_evmcs->guest_gs_selector;
- break;
- case GUEST_LDTR_SELECTOR:
- *value = current_evmcs->guest_ldtr_selector;
- break;
- case GUEST_TR_SELECTOR:
- *value = current_evmcs->guest_tr_selector;
- break;
- case VIRTUAL_PROCESSOR_ID:
- *value = current_evmcs->virtual_processor_id;
- break;
- case HOST_IA32_PERF_GLOBAL_CTRL:
- *value = current_evmcs->host_ia32_perf_global_ctrl;
- break;
- case GUEST_IA32_PERF_GLOBAL_CTRL:
- *value = current_evmcs->guest_ia32_perf_global_ctrl;
- break;
- case ENCLS_EXITING_BITMAP:
- *value = current_evmcs->encls_exiting_bitmap;
- break;
- case TSC_MULTIPLIER:
- *value = current_evmcs->tsc_multiplier;
- break;
- default: return 1;
- }
-
- return 0;
-}
-
-static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
-{
- switch (encoding) {
- case GUEST_RIP:
- current_evmcs->guest_rip = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case GUEST_RSP:
- current_evmcs->guest_rsp = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
- break;
- case GUEST_RFLAGS:
- current_evmcs->guest_rflags = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
- break;
- case HOST_IA32_PAT:
- current_evmcs->host_ia32_pat = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_IA32_EFER:
- current_evmcs->host_ia32_efer = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_CR0:
- current_evmcs->host_cr0 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_CR3:
- current_evmcs->host_cr3 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_CR4:
- current_evmcs->host_cr4 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_IA32_SYSENTER_ESP:
- current_evmcs->host_ia32_sysenter_esp = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_IA32_SYSENTER_EIP:
- current_evmcs->host_ia32_sysenter_eip = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_RIP:
- current_evmcs->host_rip = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case IO_BITMAP_A:
- current_evmcs->io_bitmap_a = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
- break;
- case IO_BITMAP_B:
- current_evmcs->io_bitmap_b = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
- break;
- case MSR_BITMAP:
- current_evmcs->msr_bitmap = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
- break;
- case GUEST_ES_BASE:
- current_evmcs->guest_es_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_CS_BASE:
- current_evmcs->guest_cs_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_SS_BASE:
- current_evmcs->guest_ss_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_DS_BASE:
- current_evmcs->guest_ds_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_FS_BASE:
- current_evmcs->guest_fs_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_GS_BASE:
- current_evmcs->guest_gs_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_LDTR_BASE:
- current_evmcs->guest_ldtr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_TR_BASE:
- current_evmcs->guest_tr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_GDTR_BASE:
- current_evmcs->guest_gdtr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_IDTR_BASE:
- current_evmcs->guest_idtr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case TSC_OFFSET:
- current_evmcs->tsc_offset = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
- break;
- case VIRTUAL_APIC_PAGE_ADDR:
- current_evmcs->virtual_apic_page_addr = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
- break;
- case VMCS_LINK_POINTER:
- current_evmcs->vmcs_link_pointer = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_IA32_DEBUGCTL:
- current_evmcs->guest_ia32_debugctl = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_IA32_PAT:
- current_evmcs->guest_ia32_pat = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_IA32_EFER:
- current_evmcs->guest_ia32_efer = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_PDPTR0:
- current_evmcs->guest_pdptr0 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_PDPTR1:
- current_evmcs->guest_pdptr1 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_PDPTR2:
- current_evmcs->guest_pdptr2 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_PDPTR3:
- current_evmcs->guest_pdptr3 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_PENDING_DBG_EXCEPTIONS:
- current_evmcs->guest_pending_dbg_exceptions = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_SYSENTER_ESP:
- current_evmcs->guest_sysenter_esp = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_SYSENTER_EIP:
- current_evmcs->guest_sysenter_eip = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case CR0_GUEST_HOST_MASK:
- current_evmcs->cr0_guest_host_mask = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case CR4_GUEST_HOST_MASK:
- current_evmcs->cr4_guest_host_mask = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case CR0_READ_SHADOW:
- current_evmcs->cr0_read_shadow = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case CR4_READ_SHADOW:
- current_evmcs->cr4_read_shadow = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case GUEST_CR0:
- current_evmcs->guest_cr0 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case GUEST_CR3:
- current_evmcs->guest_cr3 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case GUEST_CR4:
- current_evmcs->guest_cr4 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case GUEST_DR7:
- current_evmcs->guest_dr7 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
- break;
- case HOST_FS_BASE:
- current_evmcs->host_fs_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
- break;
- case HOST_GS_BASE:
- current_evmcs->host_gs_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
- break;
- case HOST_TR_BASE:
- current_evmcs->host_tr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
- break;
- case HOST_GDTR_BASE:
- current_evmcs->host_gdtr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
- break;
- case HOST_IDTR_BASE:
- current_evmcs->host_idtr_base = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
- break;
- case HOST_RSP:
- current_evmcs->host_rsp = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
- break;
- case EPT_POINTER:
- current_evmcs->ept_pointer = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
- break;
- case GUEST_BNDCFGS:
- current_evmcs->guest_bndcfgs = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case XSS_EXIT_BITMAP:
- current_evmcs->xss_exit_bitmap = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
- break;
- case GUEST_PHYSICAL_ADDRESS:
- current_evmcs->guest_physical_address = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case EXIT_QUALIFICATION:
- current_evmcs->exit_qualification = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case GUEST_LINEAR_ADDRESS:
- current_evmcs->guest_linear_address = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case VM_EXIT_MSR_STORE_ADDR:
- current_evmcs->vm_exit_msr_store_addr = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case VM_EXIT_MSR_LOAD_ADDR:
- current_evmcs->vm_exit_msr_load_addr = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case VM_ENTRY_MSR_LOAD_ADDR:
- current_evmcs->vm_entry_msr_load_addr = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case CR3_TARGET_VALUE0:
- current_evmcs->cr3_target_value0 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case CR3_TARGET_VALUE1:
- current_evmcs->cr3_target_value1 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case CR3_TARGET_VALUE2:
- current_evmcs->cr3_target_value2 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case CR3_TARGET_VALUE3:
- current_evmcs->cr3_target_value3 = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case TPR_THRESHOLD:
- current_evmcs->tpr_threshold = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case GUEST_INTERRUPTIBILITY_INFO:
- current_evmcs->guest_interruptibility_info = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
- break;
- case CPU_BASED_VM_EXEC_CONTROL:
- current_evmcs->cpu_based_vm_exec_control = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
- break;
- case EXCEPTION_BITMAP:
- current_evmcs->exception_bitmap = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
- break;
- case VM_ENTRY_CONTROLS:
- current_evmcs->vm_entry_controls = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
- break;
- case VM_ENTRY_INTR_INFO_FIELD:
- current_evmcs->vm_entry_intr_info_field = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
- break;
- case VM_ENTRY_EXCEPTION_ERROR_CODE:
- current_evmcs->vm_entry_exception_error_code = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
- break;
- case VM_ENTRY_INSTRUCTION_LEN:
- current_evmcs->vm_entry_instruction_len = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
- break;
- case HOST_IA32_SYSENTER_CS:
- current_evmcs->host_ia32_sysenter_cs = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case PIN_BASED_VM_EXEC_CONTROL:
- current_evmcs->pin_based_vm_exec_control = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
- break;
- case VM_EXIT_CONTROLS:
- current_evmcs->vm_exit_controls = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
- break;
- case SECONDARY_VM_EXEC_CONTROL:
- current_evmcs->secondary_vm_exec_control = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
- break;
- case GUEST_ES_LIMIT:
- current_evmcs->guest_es_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_CS_LIMIT:
- current_evmcs->guest_cs_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_SS_LIMIT:
- current_evmcs->guest_ss_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_DS_LIMIT:
- current_evmcs->guest_ds_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_FS_LIMIT:
- current_evmcs->guest_fs_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_GS_LIMIT:
- current_evmcs->guest_gs_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_LDTR_LIMIT:
- current_evmcs->guest_ldtr_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_TR_LIMIT:
- current_evmcs->guest_tr_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_GDTR_LIMIT:
- current_evmcs->guest_gdtr_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_IDTR_LIMIT:
- current_evmcs->guest_idtr_limit = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_ES_AR_BYTES:
- current_evmcs->guest_es_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_CS_AR_BYTES:
- current_evmcs->guest_cs_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_SS_AR_BYTES:
- current_evmcs->guest_ss_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_DS_AR_BYTES:
- current_evmcs->guest_ds_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_FS_AR_BYTES:
- current_evmcs->guest_fs_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_GS_AR_BYTES:
- current_evmcs->guest_gs_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_LDTR_AR_BYTES:
- current_evmcs->guest_ldtr_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_TR_AR_BYTES:
- current_evmcs->guest_tr_ar_bytes = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_ACTIVITY_STATE:
- current_evmcs->guest_activity_state = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case GUEST_SYSENTER_CS:
- current_evmcs->guest_sysenter_cs = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case VM_INSTRUCTION_ERROR:
- current_evmcs->vm_instruction_error = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case VM_EXIT_REASON:
- current_evmcs->vm_exit_reason = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case VM_EXIT_INTR_INFO:
- current_evmcs->vm_exit_intr_info = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case VM_EXIT_INTR_ERROR_CODE:
- current_evmcs->vm_exit_intr_error_code = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case IDT_VECTORING_INFO_FIELD:
- current_evmcs->idt_vectoring_info_field = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case IDT_VECTORING_ERROR_CODE:
- current_evmcs->idt_vectoring_error_code = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case VM_EXIT_INSTRUCTION_LEN:
- current_evmcs->vm_exit_instruction_len = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case VMX_INSTRUCTION_INFO:
- current_evmcs->vmx_instruction_info = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
- break;
- case PAGE_FAULT_ERROR_CODE_MASK:
- current_evmcs->page_fault_error_code_mask = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case PAGE_FAULT_ERROR_CODE_MATCH:
- current_evmcs->page_fault_error_code_match = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case CR3_TARGET_COUNT:
- current_evmcs->cr3_target_count = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case VM_EXIT_MSR_STORE_COUNT:
- current_evmcs->vm_exit_msr_store_count = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case VM_EXIT_MSR_LOAD_COUNT:
- current_evmcs->vm_exit_msr_load_count = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case VM_ENTRY_MSR_LOAD_COUNT:
- current_evmcs->vm_entry_msr_load_count = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- break;
- case HOST_ES_SELECTOR:
- current_evmcs->host_es_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_CS_SELECTOR:
- current_evmcs->host_cs_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_SS_SELECTOR:
- current_evmcs->host_ss_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_DS_SELECTOR:
- current_evmcs->host_ds_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_FS_SELECTOR:
- current_evmcs->host_fs_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_GS_SELECTOR:
- current_evmcs->host_gs_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case HOST_TR_SELECTOR:
- current_evmcs->host_tr_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case GUEST_ES_SELECTOR:
- current_evmcs->guest_es_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_CS_SELECTOR:
- current_evmcs->guest_cs_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_SS_SELECTOR:
- current_evmcs->guest_ss_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_DS_SELECTOR:
- current_evmcs->guest_ds_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_FS_SELECTOR:
- current_evmcs->guest_fs_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_GS_SELECTOR:
- current_evmcs->guest_gs_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_LDTR_SELECTOR:
- current_evmcs->guest_ldtr_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case GUEST_TR_SELECTOR:
- current_evmcs->guest_tr_selector = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
- break;
- case VIRTUAL_PROCESSOR_ID:
- current_evmcs->virtual_processor_id = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
- break;
- case HOST_IA32_PERF_GLOBAL_CTRL:
- current_evmcs->host_ia32_perf_global_ctrl = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- break;
- case GUEST_IA32_PERF_GLOBAL_CTRL:
- current_evmcs->guest_ia32_perf_global_ctrl = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
- break;
- case ENCLS_EXITING_BITMAP:
- current_evmcs->encls_exiting_bitmap = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
- break;
- case TSC_MULTIPLIER:
- current_evmcs->tsc_multiplier = value;
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
- break;
- default: return 1;
- }
-
- return 0;
-}
-
-static inline int evmcs_vmlaunch(void)
-{
- int ret;
-
- current_evmcs->hv_clean_fields = 0;
-
- __asm__ __volatile__("push %%rbp;"
- "push %%rcx;"
- "push %%rdx;"
- "push %%rsi;"
- "push %%rdi;"
- "push $0;"
- "mov %%rsp, (%[host_rsp]);"
- "lea 1f(%%rip), %%rax;"
- "mov %%rax, (%[host_rip]);"
- "vmlaunch;"
- "incq (%%rsp);"
- "1: pop %%rax;"
- "pop %%rdi;"
- "pop %%rsi;"
- "pop %%rdx;"
- "pop %%rcx;"
- "pop %%rbp;"
- : [ret]"=&a"(ret)
- : [host_rsp]"r"
- ((uint64_t)¤t_evmcs->host_rsp),
- [host_rip]"r"
- ((uint64_t)¤t_evmcs->host_rip)
- : "memory", "cc", "rbx", "r8", "r9", "r10",
- "r11", "r12", "r13", "r14", "r15");
- return ret;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmresume.
- */
-static inline int evmcs_vmresume(void)
-{
- int ret;
-
- /* HOST_RIP */
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
- /* HOST_RSP */
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-
- __asm__ __volatile__("push %%rbp;"
- "push %%rcx;"
- "push %%rdx;"
- "push %%rsi;"
- "push %%rdi;"
- "push $0;"
- "mov %%rsp, (%[host_rsp]);"
- "lea 1f(%%rip), %%rax;"
- "mov %%rax, (%[host_rip]);"
- "vmresume;"
- "incq (%%rsp);"
- "1: pop %%rax;"
- "pop %%rdi;"
- "pop %%rsi;"
- "pop %%rdx;"
- "pop %%rcx;"
- "pop %%rbp;"
- : [ret]"=&a"(ret)
- : [host_rsp]"r"
- ((uint64_t)¤t_evmcs->host_rsp),
- [host_rip]"r"
- ((uint64_t)¤t_evmcs->host_rip)
- : "memory", "cc", "rbx", "r8", "r9", "r10",
- "r11", "r12", "r13", "r14", "r15");
- return ret;
-}
-
-#endif /* !SELFTEST_KVM_EVMCS_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/hyperv.h
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- */
-
-#ifndef SELFTEST_KVM_HYPERV_H
-#define SELFTEST_KVM_HYPERV_H
-
-#include "processor.h"
-
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
-#define HYPERV_CPUID_INTERFACE 0x40000001
-#define HYPERV_CPUID_VERSION 0x40000002
-#define HYPERV_CPUID_FEATURES 0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
-#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
-#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
-#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
-#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
-#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
-
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-#define HV_X64_MSR_HYPERCALL 0x40000001
-#define HV_X64_MSR_VP_INDEX 0x40000002
-#define HV_X64_MSR_RESET 0x40000003
-#define HV_X64_MSR_VP_RUNTIME 0x40000010
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
-#define HV_X64_MSR_EOI 0x40000070
-#define HV_X64_MSR_ICR 0x40000071
-#define HV_X64_MSR_TPR 0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-#define HV_X64_MSR_SCONTROL 0x40000080
-#define HV_X64_MSR_SVERSION 0x40000081
-#define HV_X64_MSR_SIEFP 0x40000082
-#define HV_X64_MSR_SIMP 0x40000083
-#define HV_X64_MSR_EOM 0x40000084
-#define HV_X64_MSR_SINT0 0x40000090
-#define HV_X64_MSR_SINT1 0x40000091
-#define HV_X64_MSR_SINT2 0x40000092
-#define HV_X64_MSR_SINT3 0x40000093
-#define HV_X64_MSR_SINT4 0x40000094
-#define HV_X64_MSR_SINT5 0x40000095
-#define HV_X64_MSR_SINT6 0x40000096
-#define HV_X64_MSR_SINT7 0x40000097
-#define HV_X64_MSR_SINT8 0x40000098
-#define HV_X64_MSR_SINT9 0x40000099
-#define HV_X64_MSR_SINT10 0x4000009A
-#define HV_X64_MSR_SINT11 0x4000009B
-#define HV_X64_MSR_SINT12 0x4000009C
-#define HV_X64_MSR_SINT13 0x4000009D
-#define HV_X64_MSR_SINT14 0x4000009E
-#define HV_X64_MSR_SINT15 0x4000009F
-#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
-#define HV_X64_MSR_GUEST_IDLE 0x400000F0
-#define HV_X64_MSR_CRASH_P0 0x40000100
-#define HV_X64_MSR_CRASH_P1 0x40000101
-#define HV_X64_MSR_CRASH_P2 0x40000102
-#define HV_X64_MSR_CRASH_P3 0x40000103
-#define HV_X64_MSR_CRASH_P4 0x40000104
-#define HV_X64_MSR_CRASH_CTL 0x40000105
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
-#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
-
-#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
-#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
-#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
-#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
-#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
-#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
-
-/* HYPERV_CPUID_FEATURES.EAX */
-#define HV_MSR_VP_RUNTIME_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
-#define HV_MSR_SYNIC_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
-#define HV_MSR_SYNTIMER_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
-#define HV_MSR_APIC_ACCESS_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
-#define HV_MSR_HYPERCALL_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
-#define HV_MSR_VP_INDEX_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
-#define HV_MSR_RESET_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
-#define HV_MSR_STAT_PAGES_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
-#define HV_MSR_REFERENCE_TSC_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
-#define HV_MSR_GUEST_IDLE_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
-#define HV_ACCESS_FREQUENCY_MSRS \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
-#define HV_ACCESS_REENLIGHTENMENT \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
-#define HV_ACCESS_TSC_INVARIANT \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
-
-/* HYPERV_CPUID_FEATURES.EBX */
-#define HV_CREATE_PARTITIONS \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
-#define HV_ACCESS_PARTITION_ID \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
-#define HV_ACCESS_MEMORY_POOL \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
-#define HV_ADJUST_MESSAGE_BUFFERS \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
-#define HV_POST_MESSAGES \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
-#define HV_SIGNAL_EVENTS \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
-#define HV_CREATE_PORT \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
-#define HV_CONNECT_PORT \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
-#define HV_ACCESS_STATS \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
-#define HV_DEBUGGING \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
-#define HV_CPU_MANAGEMENT \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
-#define HV_ENABLE_EXTENDED_HYPERCALLS \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
-#define HV_ISOLATION \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
-
-/* HYPERV_CPUID_FEATURES.EDX */
-#define HV_X64_MWAIT_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
-#define HV_X64_PERF_MONITOR_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
-#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
-#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
-#define HV_STIMER_DIRECT_MODE_AVAILABLE \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
-
-/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
-#define HV_X64_AS_SWITCH_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
-#define HV_X64_APIC_ACCESS_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
-#define HV_X64_SYSTEM_RESET_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
-#define HV_X64_RELAXED_TIMING_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
-#define HV_DEPRECATING_AEOI_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
-#define HV_X64_CLUSTER_IPI_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
-
-/* HYPERV_CPUID_NESTED_FEATURES.EAX */
-#define HV_X64_NESTED_DIRECT_FLUSH \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
-#define HV_X64_NESTED_MSR_BITMAP \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
-
-/* HYPERV_CPUID_NESTED_FEATURES.EBX */
-#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
-
-/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
-#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
-
-/* Hypercalls */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
-#define HVCALL_SEND_IPI 0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
-#define HVCALL_SEND_IPI_EX 0x0015
-#define HVCALL_GET_PARTITION_ID 0x0046
-#define HVCALL_DEPOSIT_MEMORY 0x0048
-#define HVCALL_CREATE_VP 0x004e
-#define HVCALL_GET_VP_REGISTERS 0x0050
-#define HVCALL_SET_VP_REGISTERS 0x0051
-#define HVCALL_POST_MESSAGE 0x005c
-#define HVCALL_SIGNAL_EVENT 0x005d
-#define HVCALL_POST_DEBUG_DATA 0x0069
-#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
-#define HVCALL_RESET_DEBUG_SESSION 0x006b
-#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
-#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
-#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
-#define HVCALL_RETARGET_INTERRUPT 0x007e
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
-/* Extended hypercalls */
-#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
-
-#define HV_FLUSH_ALL_PROCESSORS BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS 0
-#define HV_STATUS_INVALID_HYPERCALL_CODE 2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
-#define HV_STATUS_INVALID_ALIGNMENT 4
-#define HV_STATUS_INVALID_PARAMETER 5
-#define HV_STATUS_ACCESS_DENIED 6
-#define HV_STATUS_OPERATION_DENIED 8
-#define HV_STATUS_INSUFFICIENT_MEMORY 11
-#define HV_STATUS_INVALID_PORT_ID 17
-#define HV_STATUS_INVALID_CONNECTION_ID 18
-#define HV_STATUS_INSUFFICIENT_BUFFERS 19
-
-/* hypercall options */
-#define HV_HYPERCALL_FAST_BIT BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET 17
-#define HV_HYPERCALL_REP_COMP_OFFSET 32
-
-/*
- * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
- * is set to the hypercall status (if no exception occurred).
- */
-static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
- vm_vaddr_t output_address,
- uint64_t *hv_status)
-{
- uint64_t error_code;
- uint8_t vector;
-
- /* Note both the hypercall and the "asm safe" clobber r9-r11. */
- asm volatile("mov %[output_address], %%r8\n\t"
- KVM_ASM_SAFE("vmcall")
- : "=a" (*hv_status),
- "+c" (control), "+d" (input_address),
- KVM_ASM_SAFE_OUTPUTS(vector, error_code)
- : [output_address] "r"(output_address),
- "a" (-EFAULT)
- : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
- return vector;
-}
-
-/* Issue a Hyper-V hypercall and assert that it succeeded. */
-static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
- vm_vaddr_t output_address)
-{
- uint64_t hv_status;
- uint8_t vector;
-
- vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
-
- GUEST_ASSERT(!vector);
- GUEST_ASSERT((hv_status & 0xffff) == 0);
-}
-
-/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
-static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
-{
- int i;
-
- for (i = 0; i < n_sse_regs; i++)
- write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
-}
-
-/* Proper HV_X64_MSR_GUEST_OS_ID value */
-#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
-
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-struct hv_nested_enlightenments_control {
- struct {
- __u32 directhypercall:1;
- __u32 reserved:31;
- } features;
- struct {
- __u32 reserved;
- } hypercallControls;
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
- __u32 apic_assist;
- __u32 reserved1;
- __u64 vtl_control[3];
- struct hv_nested_enlightenments_control nested_control;
- __u8 enlighten_vmentry;
- __u8 reserved2[7];
- __u64 current_nested_vmcs;
-} __packed;
-
-extern struct hv_vp_assist_page *current_vp_assist;
-
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
-
-struct hyperv_test_pages {
- /* VP assist page */
- void *vp_assist_hva;
- uint64_t vp_assist_gpa;
- void *vp_assist;
-
- /* Partition assist page */
- void *partition_assist_hva;
- uint64_t partition_assist_gpa;
- void *partition_assist;
-
- /* Enlightened VMCS */
- void *enlightened_vmcs_hva;
- uint64_t enlightened_vmcs_gpa;
- void *enlightened_vmcs;
-};
-
-struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
- vm_vaddr_t *p_hv_pages_gva);
-
-/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
-#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
-
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
-
-bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
-
-#endif /* !SELFTEST_KVM_HYPERV_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "kvm_util_types.h"
-#include "test_util.h"
-
-extern bool is_forced_emulation_enabled;
-
-struct kvm_vm_arch {
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
- vm_vaddr_t idt;
-
- uint64_t c_bit;
- uint64_t s_bit;
- int sev_fd;
- bool is_pt_protected;
-};
-
-static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
-{
- return arch->c_bit || arch->s_bit;
-}
-
-#define vm_arch_has_protected_memory(vm) \
- __vm_arch_has_protected_memory(&(vm)->arch)
-
-#define vcpu_arch_put_guest(mem, __val) \
-do { \
- const typeof(mem) val = (__val); \
- \
- if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \
- (mem) = val; \
- } else if (guest_random_bool(&guest_rng)) { \
- __asm__ __volatile__(KVM_FEP "mov %1, %0" \
- : "+m" (mem) \
- : "r" (val) : "memory"); \
- } else { \
- uint64_t __old = READ_ONCE(mem); \
- \
- __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \
- : [ptr] "+m" (mem), [old] "+a" (__old) \
- : [new]"r" (val) : "memory", "cc"); \
- } \
-} while (0)
-
-#endif // SELFTEST_KVM_UTIL_ARCH_H
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/mce.h
- *
- * Copyright (C) 2022, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_MCE_H
-#define SELFTEST_KVM_MCE_H
-
-#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
-#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
-#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
-#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
-#define KVM_MAX_MCE_BANKS 32
-#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */
-#define MCI_STATUS_VAL (1ULL << 63) /* valid error */
-#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */
-#define MCI_STATUS_EN (1ULL << 60) /* error enabled */
-#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
-#define MCM_ADDR_PHYS 2 /* physical address */
-#define MCI_CTL2_CMCI_EN BIT_ULL(30)
-
-#endif /* SELFTEST_KVM_MCE_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-#ifndef SELFTEST_KVM_PMU_H
-#define SELFTEST_KVM_PMU_H
-
-#include <stdint.h>
-
-#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
-
-/*
- * Encode an eventsel+umask pair into event-select MSR format. Note, this is
- * technically AMD's format, as Intel's format only supports 8 bits for the
- * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing
- * in '0' is a nop and won't clobber the CMASK.
- */
-#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \
- ((eventsel) & 0xff) | \
- ((umask) & 0xff) << 8)
-
-/*
- * These are technically Intel's definitions, but except for CMASK (see above),
- * AMD's layout is compatible with Intel's.
- */
-#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0)
-#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8)
-#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16)
-#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17)
-#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18)
-#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19)
-#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20)
-#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21)
-#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22)
-#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23)
-#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24)
-
-/* RDPMC control flags, Intel only. */
-#define INTEL_RDPMC_METRICS BIT_ULL(29)
-#define INTEL_RDPMC_FIXED BIT_ULL(30)
-#define INTEL_RDPMC_FAST BIT_ULL(31)
-
-/* Fixed PMC controls, Intel only. */
-#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
-
-#define FIXED_PMC_KERNEL BIT_ULL(0)
-#define FIXED_PMC_USER BIT_ULL(1)
-#define FIXED_PMC_ANYTHREAD BIT_ULL(2)
-#define FIXED_PMC_ENABLE_PMI BIT_ULL(3)
-#define FIXED_PMC_NR_BITS 4
-#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
-
-#define PMU_CAP_FW_WRITES BIT_ULL(13)
-#define PMU_CAP_LBR_FMT 0x3f
-
-#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00)
-#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
-#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01)
-#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f)
-#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41)
-#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
-#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
-#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
-
-#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
-#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
-#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00)
-#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00)
-
-/*
- * Note! The order and thus the index of the architectural events matters as
- * support for each event is enumerated via CPUID using the index of the event.
- */
-enum intel_pmu_architectural_events {
- INTEL_ARCH_CPU_CYCLES_INDEX,
- INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
- INTEL_ARCH_REFERENCE_CYCLES_INDEX,
- INTEL_ARCH_LLC_REFERENCES_INDEX,
- INTEL_ARCH_LLC_MISSES_INDEX,
- INTEL_ARCH_BRANCHES_RETIRED_INDEX,
- INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
- INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
- NR_INTEL_ARCH_EVENTS,
-};
-
-enum amd_pmu_zen_events {
- AMD_ZEN_CORE_CYCLES_INDEX,
- AMD_ZEN_INSTRUCTIONS_INDEX,
- AMD_ZEN_BRANCHES_INDEX,
- AMD_ZEN_BRANCH_MISSES_INDEX,
- NR_AMD_ZEN_EVENTS,
-};
-
-extern const uint64_t intel_pmu_arch_events[];
-extern const uint64_t amd_pmu_zen_events[];
-
-#endif /* SELFTEST_KVM_PMU_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/processor.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <assert.h>
-#include <stdint.h>
-#include <syscall.h>
-
-#include <asm/msr-index.h>
-#include <asm/prctl.h>
-
-#include <linux/kvm_para.h>
-#include <linux/stringify.h>
-
-#include "kvm_util.h"
-#include "ucall_common.h"
-
-extern bool host_cpu_is_intel;
-extern bool host_cpu_is_amd;
-extern uint64_t guest_tsc_khz;
-
-#ifndef MAX_NR_CPUID_ENTRIES
-#define MAX_NR_CPUID_ENTRIES 100
-#endif
-
-/* Forced emulation prefix, used to invoke the emulator unconditionally. */
-#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
-
-#define NMI_VECTOR 0x02
-
-#define X86_EFLAGS_FIXED (1u << 1)
-
-#define X86_CR4_VME (1ul << 0)
-#define X86_CR4_PVI (1ul << 1)
-#define X86_CR4_TSD (1ul << 2)
-#define X86_CR4_DE (1ul << 3)
-#define X86_CR4_PSE (1ul << 4)
-#define X86_CR4_PAE (1ul << 5)
-#define X86_CR4_MCE (1ul << 6)
-#define X86_CR4_PGE (1ul << 7)
-#define X86_CR4_PCE (1ul << 8)
-#define X86_CR4_OSFXSR (1ul << 9)
-#define X86_CR4_OSXMMEXCPT (1ul << 10)
-#define X86_CR4_UMIP (1ul << 11)
-#define X86_CR4_LA57 (1ul << 12)
-#define X86_CR4_VMXE (1ul << 13)
-#define X86_CR4_SMXE (1ul << 14)
-#define X86_CR4_FSGSBASE (1ul << 16)
-#define X86_CR4_PCIDE (1ul << 17)
-#define X86_CR4_OSXSAVE (1ul << 18)
-#define X86_CR4_SMEP (1ul << 20)
-#define X86_CR4_SMAP (1ul << 21)
-#define X86_CR4_PKE (1ul << 22)
-
-struct xstate_header {
- u64 xstate_bv;
- u64 xcomp_bv;
- u64 reserved[6];
-} __attribute__((packed));
-
-struct xstate {
- u8 i387[512];
- struct xstate_header header;
- u8 extended_state_area[0];
-} __attribute__ ((packed, aligned (64)));
-
-#define XFEATURE_MASK_FP BIT_ULL(0)
-#define XFEATURE_MASK_SSE BIT_ULL(1)
-#define XFEATURE_MASK_YMM BIT_ULL(2)
-#define XFEATURE_MASK_BNDREGS BIT_ULL(3)
-#define XFEATURE_MASK_BNDCSR BIT_ULL(4)
-#define XFEATURE_MASK_OPMASK BIT_ULL(5)
-#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
-#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
-#define XFEATURE_MASK_PT BIT_ULL(8)
-#define XFEATURE_MASK_PKRU BIT_ULL(9)
-#define XFEATURE_MASK_PASID BIT_ULL(10)
-#define XFEATURE_MASK_CET_USER BIT_ULL(11)
-#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
-#define XFEATURE_MASK_LBR BIT_ULL(15)
-#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
-#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
-
-#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \
- XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM)
-#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \
- XFEATURE_MASK_XTILE_CFG)
-
-/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
-enum cpuid_output_regs {
- KVM_CPUID_EAX,
- KVM_CPUID_EBX,
- KVM_CPUID_ECX,
- KVM_CPUID_EDX
-};
-
-/*
- * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
- * passed by value with no overhead.
- */
-struct kvm_x86_cpu_feature {
- u32 function;
- u16 index;
- u8 reg;
- u8 bit;
-};
-#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \
-({ \
- struct kvm_x86_cpu_feature feature = { \
- .function = fn, \
- .index = idx, \
- .reg = KVM_CPUID_##gpr, \
- .bit = __bit, \
- }; \
- \
- kvm_static_assert((fn & 0xc0000000) == 0 || \
- (fn & 0xc0000000) == 0x40000000 || \
- (fn & 0xc0000000) == 0x80000000 || \
- (fn & 0xc0000000) == 0xc0000000); \
- kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
- feature; \
-})
-
-/*
- * Basic Leafs, a.k.a. Intel defined
- */
-#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
-#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
-#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
-#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
-#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
-#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
-#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
-#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
-#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
-#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
-#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
-#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
-#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
-#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
-#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
-#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
-#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
-#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
-#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
-#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
-#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
-#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
-#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
-#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
-#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
-#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
-#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
-#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
-#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
-#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
-#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
-#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
-#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
-#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
-#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
-#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
-#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
-#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
-#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
-#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
-#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
-#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
-#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
-#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
-#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
-#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
-#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
-
-/*
- * Extended Leafs, a.k.a. AMD defined
- */
-#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
-#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
-#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
-#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
-#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
-#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
-#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
-#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
-#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
-#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
-#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
-#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
-#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
-#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
-#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
-#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
-#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
-
-/*
- * KVM defined paravirt features.
- */
-#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
-#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
-#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
-#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
-#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
-#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
-#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
-#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
-/* Bit 8 apparently isn't used?!?! */
-#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
-#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
-#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
-#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
-#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
-#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
-#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
-#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
-#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
-
-/*
- * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
- * value/property as opposed to a single-bit feature. Again, pack the info
- * into a 64-bit value to pass by value with no overhead.
- */
-struct kvm_x86_cpu_property {
- u32 function;
- u8 index;
- u8 reg;
- u8 lo_bit;
- u8 hi_bit;
-};
-#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \
-({ \
- struct kvm_x86_cpu_property property = { \
- .function = fn, \
- .index = idx, \
- .reg = KVM_CPUID_##gpr, \
- .lo_bit = low_bit, \
- .hi_bit = high_bit, \
- }; \
- \
- kvm_static_assert(low_bit < high_bit); \
- kvm_static_assert((fn & 0xc0000000) == 0 || \
- (fn & 0xc0000000) == 0x40000000 || \
- (fn & 0xc0000000) == 0x80000000 || \
- (fn & 0xc0000000) == 0xc0000000); \
- kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
- property; \
-})
-
-#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
-#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
-#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
-#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
-#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
-#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
-#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
-#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
-#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
-
-#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
-#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
-#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31)
-#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31)
-
-#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31)
-#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31)
-#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31)
-#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15)
-#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
-#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15)
-#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
-#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15)
-
-#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
-
-#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
-#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
-#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
-#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
-#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
-#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
-
-#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
-
-/*
- * Intel's architectural PMU events are bizarre. They have a "feature" bit
- * that indicates the feature is _not_ supported, and a property that states
- * the length of the bit mask of unsupported features. A feature is supported
- * if the size of the bit mask is larger than the "unavailable" bit, and said
- * bit is not set. Fixed counters also bizarre enumeration, but inverted from
- * arch events for general purpose counters. Fixed counters are supported if a
- * feature flag is set **OR** the total number of fixed counters is greater
- * than index of the counter.
- *
- * Wrap the events for general purpose and fixed counters to simplify checking
- * whether or not a given architectural event is supported.
- */
-struct kvm_x86_pmu_feature {
- struct kvm_x86_cpu_feature f;
-};
-#define KVM_X86_PMU_FEATURE(__reg, __bit) \
-({ \
- struct kvm_x86_pmu_feature feature = { \
- .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \
- }; \
- \
- kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \
- KVM_CPUID_##__reg == KVM_CPUID_ECX); \
- feature; \
-})
-
-#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0)
-#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1)
-#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2)
-#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3)
-#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4)
-#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
-#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
-#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
-
-#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
-#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
-#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2)
-#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3)
-
-static inline unsigned int x86_family(unsigned int eax)
-{
- unsigned int x86;
-
- x86 = (eax >> 8) & 0xf;
-
- if (x86 == 0xf)
- x86 += (eax >> 20) & 0xff;
-
- return x86;
-}
-
-static inline unsigned int x86_model(unsigned int eax)
-{
- return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
-}
-
-/* Page table bitfield declarations */
-#define PTE_PRESENT_MASK BIT_ULL(0)
-#define PTE_WRITABLE_MASK BIT_ULL(1)
-#define PTE_USER_MASK BIT_ULL(2)
-#define PTE_ACCESSED_MASK BIT_ULL(5)
-#define PTE_DIRTY_MASK BIT_ULL(6)
-#define PTE_LARGE_MASK BIT_ULL(7)
-#define PTE_GLOBAL_MASK BIT_ULL(8)
-#define PTE_NX_MASK BIT_ULL(63)
-
-#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
-
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1ULL << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
-
-#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
-#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x))
-#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
-
-#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK)
-#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT)
-
-/* General Registers in 64-Bit Mode */
-struct gpr64_regs {
- u64 rax;
- u64 rcx;
- u64 rdx;
- u64 rbx;
- u64 rsp;
- u64 rbp;
- u64 rsi;
- u64 rdi;
- u64 r8;
- u64 r9;
- u64 r10;
- u64 r11;
- u64 r12;
- u64 r13;
- u64 r14;
- u64 r15;
-};
-
-struct desc64 {
- uint16_t limit0;
- uint16_t base0;
- unsigned base1:8, type:4, s:1, dpl:2, p:1;
- unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
- uint32_t base3;
- uint32_t zero1;
-} __attribute__((packed));
-
-struct desc_ptr {
- uint16_t size;
- uint64_t address;
-} __attribute__((packed));
-
-struct kvm_x86_state {
- struct kvm_xsave *xsave;
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
-static inline uint64_t get_desc64_base(const struct desc64 *desc)
-{
- return ((uint64_t)desc->base3 << 32) |
- (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
-}
-
-static inline uint64_t rdtsc(void)
-{
- uint32_t eax, edx;
- uint64_t tsc_val;
- /*
- * The lfence is to wait (on Intel CPUs) until all previous
- * instructions have been executed. If software requires RDTSC to be
- * executed prior to execution of any subsequent instruction, it can
- * execute LFENCE immediately after RDTSC
- */
- __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
- tsc_val = ((uint64_t)edx) << 32 | eax;
- return tsc_val;
-}
-
-static inline uint64_t rdtscp(uint32_t *aux)
-{
- uint32_t eax, edx;
-
- __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
- return ((uint64_t)edx) << 32 | eax;
-}
-
-static inline uint64_t rdmsr(uint32_t msr)
-{
- uint32_t a, d;
-
- __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
- return a | ((uint64_t) d << 32);
-}
-
-static inline void wrmsr(uint32_t msr, uint64_t value)
-{
- uint32_t a = value;
- uint32_t d = value >> 32;
-
- __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-
-static inline uint16_t inw(uint16_t port)
-{
- uint16_t tmp;
-
- __asm__ __volatile__("in %%dx, %%ax"
- : /* output */ "=a" (tmp)
- : /* input */ "d" (port));
-
- return tmp;
-}
-
-static inline uint16_t get_es(void)
-{
- uint16_t es;
-
- __asm__ __volatile__("mov %%es, %[es]"
- : /* output */ [es]"=rm"(es));
- return es;
-}
-
-static inline uint16_t get_cs(void)
-{
- uint16_t cs;
-
- __asm__ __volatile__("mov %%cs, %[cs]"
- : /* output */ [cs]"=rm"(cs));
- return cs;
-}
-
-static inline uint16_t get_ss(void)
-{
- uint16_t ss;
-
- __asm__ __volatile__("mov %%ss, %[ss]"
- : /* output */ [ss]"=rm"(ss));
- return ss;
-}
-
-static inline uint16_t get_ds(void)
-{
- uint16_t ds;
-
- __asm__ __volatile__("mov %%ds, %[ds]"
- : /* output */ [ds]"=rm"(ds));
- return ds;
-}
-
-static inline uint16_t get_fs(void)
-{
- uint16_t fs;
-
- __asm__ __volatile__("mov %%fs, %[fs]"
- : /* output */ [fs]"=rm"(fs));
- return fs;
-}
-
-static inline uint16_t get_gs(void)
-{
- uint16_t gs;
-
- __asm__ __volatile__("mov %%gs, %[gs]"
- : /* output */ [gs]"=rm"(gs));
- return gs;
-}
-
-static inline uint16_t get_tr(void)
-{
- uint16_t tr;
-
- __asm__ __volatile__("str %[tr]"
- : /* output */ [tr]"=rm"(tr));
- return tr;
-}
-
-static inline uint64_t get_cr0(void)
-{
- uint64_t cr0;
-
- __asm__ __volatile__("mov %%cr0, %[cr0]"
- : /* output */ [cr0]"=r"(cr0));
- return cr0;
-}
-
-static inline uint64_t get_cr3(void)
-{
- uint64_t cr3;
-
- __asm__ __volatile__("mov %%cr3, %[cr3]"
- : /* output */ [cr3]"=r"(cr3));
- return cr3;
-}
-
-static inline uint64_t get_cr4(void)
-{
- uint64_t cr4;
-
- __asm__ __volatile__("mov %%cr4, %[cr4]"
- : /* output */ [cr4]"=r"(cr4));
- return cr4;
-}
-
-static inline void set_cr4(uint64_t val)
-{
- __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
-}
-
-static inline u64 xgetbv(u32 index)
-{
- u32 eax, edx;
-
- __asm__ __volatile__("xgetbv;"
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax | ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
-}
-
-static inline void wrpkru(u32 pkru)
-{
- /* Note, ECX and EDX are architecturally required to be '0'. */
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (pkru), "c"(0), "d"(0));
-}
-
-static inline struct desc_ptr get_gdt(void)
-{
- struct desc_ptr gdt;
- __asm__ __volatile__("sgdt %[gdt]"
- : /* output */ [gdt]"=m"(gdt));
- return gdt;
-}
-
-static inline struct desc_ptr get_idt(void)
-{
- struct desc_ptr idt;
- __asm__ __volatile__("sidt %[idt]"
- : /* output */ [idt]"=m"(idt));
- return idt;
-}
-
-static inline void outl(uint16_t port, uint32_t value)
-{
- __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
-}
-
-static inline void __cpuid(uint32_t function, uint32_t index,
- uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
-{
- *eax = function;
- *ecx = index;
-
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
-}
-
-static inline void cpuid(uint32_t function,
- uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
-{
- return __cpuid(function, 0, eax, ebx, ecx, edx);
-}
-
-static inline uint32_t this_cpu_fms(void)
-{
- uint32_t eax, ebx, ecx, edx;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
- return eax;
-}
-
-static inline uint32_t this_cpu_family(void)
-{
- return x86_family(this_cpu_fms());
-}
-
-static inline uint32_t this_cpu_model(void)
-{
- return x86_model(this_cpu_fms());
-}
-
-static inline bool this_cpu_vendor_string_is(const char *vendor)
-{
- const uint32_t *chunk = (const uint32_t *)vendor;
- uint32_t eax, ebx, ecx, edx;
-
- cpuid(0, &eax, &ebx, &ecx, &edx);
- return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
-}
-
-static inline bool this_cpu_is_intel(void)
-{
- return this_cpu_vendor_string_is("GenuineIntel");
-}
-
-/*
- * Exclude early K5 samples with a vendor string of "AMDisbetter!"
- */
-static inline bool this_cpu_is_amd(void)
-{
- return this_cpu_vendor_string_is("AuthenticAMD");
-}
-
-static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
- uint8_t reg, uint8_t lo, uint8_t hi)
-{
- uint32_t gprs[4];
-
- __cpuid(function, index,
- &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
- &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
-
- return (gprs[reg] & GENMASK(hi, lo)) >> lo;
-}
-
-static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
-{
- return __this_cpu_has(feature.function, feature.index,
- feature.reg, feature.bit, feature.bit);
-}
-
-static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
-{
- return __this_cpu_has(property.function, property.index,
- property.reg, property.lo_bit, property.hi_bit);
-}
-
-static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
-{
- uint32_t max_leaf;
-
- switch (property.function & 0xc0000000) {
- case 0:
- max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
- break;
- case 0x40000000:
- max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
- break;
- case 0x80000000:
- max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
- break;
- case 0xc0000000:
- max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
- }
- return max_leaf >= property.function;
-}
-
-static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
-{
- uint32_t nr_bits;
-
- if (feature.f.reg == KVM_CPUID_EBX) {
- nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
- return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
- }
-
- GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
- nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
- return nr_bits > feature.f.bit || this_cpu_has(feature.f);
-}
-
-static __always_inline uint64_t this_cpu_supported_xcr0(void)
-{
- if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
- return 0;
-
- return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
- ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
-}
-
-typedef u32 __attribute__((vector_size(16))) sse128_t;
-#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
-#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
-#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
-
-static inline void read_sse_reg(int reg, sse128_t *data)
-{
- switch (reg) {
- case 0:
- asm("movdqa %%xmm0, %0" : "=m"(*data));
- break;
- case 1:
- asm("movdqa %%xmm1, %0" : "=m"(*data));
- break;
- case 2:
- asm("movdqa %%xmm2, %0" : "=m"(*data));
- break;
- case 3:
- asm("movdqa %%xmm3, %0" : "=m"(*data));
- break;
- case 4:
- asm("movdqa %%xmm4, %0" : "=m"(*data));
- break;
- case 5:
- asm("movdqa %%xmm5, %0" : "=m"(*data));
- break;
- case 6:
- asm("movdqa %%xmm6, %0" : "=m"(*data));
- break;
- case 7:
- asm("movdqa %%xmm7, %0" : "=m"(*data));
- break;
- default:
- BUG();
- }
-}
-
-static inline void write_sse_reg(int reg, const sse128_t *data)
-{
- switch (reg) {
- case 0:
- asm("movdqa %0, %%xmm0" : : "m"(*data));
- break;
- case 1:
- asm("movdqa %0, %%xmm1" : : "m"(*data));
- break;
- case 2:
- asm("movdqa %0, %%xmm2" : : "m"(*data));
- break;
- case 3:
- asm("movdqa %0, %%xmm3" : : "m"(*data));
- break;
- case 4:
- asm("movdqa %0, %%xmm4" : : "m"(*data));
- break;
- case 5:
- asm("movdqa %0, %%xmm5" : : "m"(*data));
- break;
- case 6:
- asm("movdqa %0, %%xmm6" : : "m"(*data));
- break;
- case 7:
- asm("movdqa %0, %%xmm7" : : "m"(*data));
- break;
- default:
- BUG();
- }
-}
-
-static inline void cpu_relax(void)
-{
- asm volatile("rep; nop" ::: "memory");
-}
-
-static inline void udelay(unsigned long usec)
-{
- uint64_t start, now, cycles;
-
- GUEST_ASSERT(guest_tsc_khz);
- cycles = guest_tsc_khz / 1000 * usec;
-
- /*
- * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
- * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
- */
- start = rdtsc();
- do {
- now = rdtsc();
- } while (now - start < cycles);
-}
-
-#define ud2() \
- __asm__ __volatile__( \
- "ud2\n" \
- )
-
-#define hlt() \
- __asm__ __volatile__( \
- "hlt\n" \
- )
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
-void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
-void kvm_x86_state_cleanup(struct kvm_x86_state *state);
-
-const struct kvm_msr_list *kvm_get_msr_index_list(void);
-const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
-uint64_t kvm_get_feature_msr(uint64_t msr_index);
-
-static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
- struct kvm_msrs *msrs)
-{
- int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
-
- TEST_ASSERT(r == msrs->nmsrs,
- "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
- r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
-}
-static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
-{
- int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
-
- TEST_ASSERT(r == msrs->nmsrs,
- "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
- r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
-}
-static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
- struct kvm_debugregs *debugregs)
-{
- vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
-}
-static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
- struct kvm_debugregs *debugregs)
-{
- vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
-}
-static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
- struct kvm_xsave *xsave)
-{
- vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
-}
-static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
- struct kvm_xsave *xsave)
-{
- vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
-}
-static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
- struct kvm_xsave *xsave)
-{
- vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
-}
-static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
- struct kvm_xcrs *xcrs)
-{
- vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
-}
-static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
-{
- vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
-}
-
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
- uint32_t function, uint32_t index);
-const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-
-static inline uint32_t kvm_cpu_fms(void)
-{
- return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
-}
-
-static inline uint32_t kvm_cpu_family(void)
-{
- return x86_family(kvm_cpu_fms());
-}
-
-static inline uint32_t kvm_cpu_model(void)
-{
- return x86_model(kvm_cpu_fms());
-}
-
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
- struct kvm_x86_cpu_feature feature);
-
-static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
-{
- return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
-}
-
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
- struct kvm_x86_cpu_property property);
-
-static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
-{
- return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
-}
-
-static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
-{
- uint32_t max_leaf;
-
- switch (property.function & 0xc0000000) {
- case 0:
- max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
- break;
- case 0x40000000:
- max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
- break;
- case 0x80000000:
- max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
- break;
- case 0xc0000000:
- max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
- }
- return max_leaf >= property.function;
-}
-
-static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
-{
- uint32_t nr_bits;
-
- if (feature.f.reg == KVM_CPUID_EBX) {
- nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
- return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
- }
-
- TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
- nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
- return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
-}
-
-static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
-{
- if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
- return 0;
-
- return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
- ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
-}
-
-static inline size_t kvm_cpuid2_size(int nr_entries)
-{
- return sizeof(struct kvm_cpuid2) +
- sizeof(struct kvm_cpuid_entry2) * nr_entries;
-}
-
-/*
- * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
- * entries sized to hold @nr_entries. The caller is responsible for freeing
- * the struct.
- */
-static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
-{
- struct kvm_cpuid2 *cpuid;
-
- cpuid = malloc(kvm_cpuid2_size(nr_entries));
- TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
-
- cpuid->nent = nr_entries;
-
- return cpuid;
-}
-
-void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
-
-static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
- uint32_t function,
- uint32_t index)
-{
- return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
- function, index);
-}
-
-static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
- uint32_t function)
-{
- return __vcpu_get_cpuid_entry(vcpu, function, 0);
-}
-
-static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
-{
- int r;
-
- TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
- r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
- if (r)
- return r;
-
- /* On success, refresh the cache to pick up adjustments made by KVM. */
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
- return 0;
-}
-
-static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
-{
- TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
- vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
-
- /* Refresh the cache to pick up adjustments made by KVM. */
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
-{
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_property property,
- uint32_t value);
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
-
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
-
-static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_feature feature)
-{
- struct kvm_cpuid_entry2 *entry;
-
- entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
- return *((&entry->eax) + feature.reg) & BIT(feature.bit);
-}
-
-void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_feature feature,
- bool set);
-
-static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_feature feature)
-{
- vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
-
-}
-
-static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_feature feature)
-{
- vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
-
-/*
- * Assert on an MSR access(es) and pretty print the MSR name when possible.
- * Note, the caller provides the stringified name so that the name of macro is
- * printed, not the value the macro resolves to (due to macro expansion).
- */
-#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
-do { \
- if (__builtin_constant_p(msr)) { \
- TEST_ASSERT(cond, fmt, str, args); \
- } else if (!(cond)) { \
- char buf[16]; \
- \
- snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
- TEST_ASSERT(cond, fmt, buf, args); \
- } \
-} while (0)
-
-/*
- * Returns true if KVM should return the last written value when reading an MSR
- * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
- * is changing, etc. This is NOT an exhaustive list! The intent is to filter
- * out MSRs that are not durable _and_ that a selftest wants to write.
- */
-static inline bool is_durable_msr(uint32_t msr)
-{
- return msr != MSR_IA32_TSC;
-}
-
-#define vcpu_set_msr(vcpu, msr, val) \
-do { \
- uint64_t r, v = val; \
- \
- TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
- "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
- if (!is_durable_msr(msr)) \
- break; \
- r = vcpu_get_msr(vcpu, msr); \
- TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
-} while (0)
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
-void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-struct ex_regs {
- uint64_t rax, rcx, rdx, rbx;
- uint64_t rbp, rsi, rdi;
- uint64_t r8, r9, r10, r11;
- uint64_t r12, r13, r14, r15;
- uint64_t vector;
- uint64_t error_code;
- uint64_t rip;
- uint64_t cs;
- uint64_t rflags;
-};
-
-struct idt_entry {
- uint16_t offset0;
- uint16_t selector;
- uint16_t ist : 3;
- uint16_t : 5;
- uint16_t type : 4;
- uint16_t : 1;
- uint16_t dpl : 2;
- uint16_t p : 1;
- uint16_t offset1;
- uint32_t offset2; uint32_t reserved;
-};
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *));
-
-/* If a toddler were to say "abracadabra". */
-#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
-
-/*
- * KVM selftest exception fixup uses registers to coordinate with the exception
- * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
- * per-CPU data. Using only registers avoids having to map memory into the
- * guest, doesn't require a valid, stable GS.base, and reduces the risk of
- * for recursive faults when accessing memory in the handler. The downside to
- * using registers is that it restricts what registers can be used by the actual
- * instruction. But, selftests are 64-bit only, making register* pressure a
- * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved
- * by the callee, and except for r11 are not implicit parameters to any
- * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit
- * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
- * is higher priority than testing non-faulting SYSCALL/SYSRET.
- *
- * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
- * is guaranteed to be non-zero on fault.
- *
- * REGISTER INPUTS:
- * r9 = MAGIC
- * r10 = RIP
- * r11 = new RIP on fault
- *
- * REGISTER OUTPUTS:
- * r9 = exception vector (non-zero)
- * r10 = error code
- */
-#define __KVM_ASM_SAFE(insn, fep) \
- "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
- "lea 1f(%%rip), %%r10\n\t" \
- "lea 2f(%%rip), %%r11\n\t" \
- fep "1: " insn "\n\t" \
- "xor %%r9, %%r9\n\t" \
- "2:\n\t" \
- "mov %%r9b, %[vector]\n\t" \
- "mov %%r10, %[error_code]\n\t"
-
-#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
-#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
-
-#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec)
-#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
-
-#define kvm_asm_safe(insn, inputs...) \
-({ \
- uint64_t ign_error_code; \
- uint8_t vector; \
- \
- asm volatile(KVM_ASM_SAFE(insn) \
- : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
- : inputs \
- : KVM_ASM_SAFE_CLOBBERS); \
- vector; \
-})
-
-#define kvm_asm_safe_ec(insn, error_code, inputs...) \
-({ \
- uint8_t vector; \
- \
- asm volatile(KVM_ASM_SAFE(insn) \
- : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
- : inputs \
- : KVM_ASM_SAFE_CLOBBERS); \
- vector; \
-})
-
-#define kvm_asm_safe_fep(insn, inputs...) \
-({ \
- uint64_t ign_error_code; \
- uint8_t vector; \
- \
- asm volatile(KVM_ASM_SAFE(insn) \
- : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
- : inputs \
- : KVM_ASM_SAFE_CLOBBERS); \
- vector; \
-})
-
-#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
-({ \
- uint8_t vector; \
- \
- asm volatile(KVM_ASM_SAFE_FEP(insn) \
- : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
- : inputs \
- : KVM_ASM_SAFE_CLOBBERS); \
- vector; \
-})
-
-#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
-static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
-{ \
- uint64_t error_code; \
- uint8_t vector; \
- uint32_t a, d; \
- \
- asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
- : "=a"(a), "=d"(d), \
- KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
- : "c"(idx) \
- : KVM_ASM_SAFE_CLOBBERS); \
- \
- *val = (uint64_t)a | ((uint64_t)d << 32); \
- return vector; \
-}
-
-/*
- * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
- * use ECX as in input index, and EDX:EAX as a 64-bit output.
- */
-#define BUILD_READ_U64_SAFE_HELPERS(insn) \
- BUILD_READ_U64_SAFE_HELPER(insn, , ) \
- BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
-
-BUILD_READ_U64_SAFE_HELPERS(rdmsr)
-BUILD_READ_U64_SAFE_HELPERS(rdpmc)
-BUILD_READ_U64_SAFE_HELPERS(xgetbv)
-
-static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
-{
- return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
-}
-
-static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
-}
-
-bool kvm_is_tdp_enabled(void);
-
-static inline bool kvm_is_pmu_enabled(void)
-{
- return get_kvm_param_bool("enable_pmu");
-}
-
-static inline bool kvm_is_forced_emulation_enabled(void)
-{
- return !!get_kvm_param_integer("force_emulation_prefix");
-}
-
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level);
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
- uint64_t a3);
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-
-static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
- uint64_t size, uint64_t flags)
-{
- return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
-}
-
-static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
- uint64_t flags)
-{
- uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
-
- GUEST_ASSERT(!ret);
-}
-
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
-
-#define vm_xsave_require_permission(xfeature) \
- __vm_xsave_require_permission(xfeature, #xfeature)
-
-enum pg_level {
- PG_LEVEL_NONE,
- PG_LEVEL_4K,
- PG_LEVEL_2M,
- PG_LEVEL_1G,
- PG_LEVEL_512G,
- PG_LEVEL_NUM
-};
-
-#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
-#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
-
-#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
-#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
-#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
-
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint64_t nr_bytes, int level);
-
-/*
- * Basic CPU control in CR0
- */
-#define X86_CR0_PE (1UL<<0) /* Protection Enable */
-#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
-#define X86_CR0_EM (1UL<<2) /* Emulation */
-#define X86_CR0_TS (1UL<<3) /* Task Switched */
-#define X86_CR0_ET (1UL<<4) /* Extension Type */
-#define X86_CR0_NE (1UL<<5) /* Numeric Error */
-#define X86_CR0_WP (1UL<<16) /* Write Protect */
-#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
-#define X86_CR0_NW (1UL<<29) /* Not Write-through */
-#define X86_CR0_CD (1UL<<30) /* Cache Disable */
-#define X86_CR0_PG (1UL<<31) /* Paging */
-
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_SGX_BIT 15
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-#define PFERR_IMPLICIT_ACCESS_BIT 48
-
-#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
-#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
-#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
-#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
-#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
-
-bool sys_clocksource_is_based_on_tsc(void);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Helpers used for SEV guests
- *
- */
-#ifndef SELFTEST_KVM_SEV_H
-#define SELFTEST_KVM_SEV_H
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "linux/psp-sev.h"
-
-#include "kvm_util.h"
-#include "svm_util.h"
-#include "processor.h"
-
-enum sev_guest_state {
- SEV_GUEST_STATE_UNINITIALIZED = 0,
- SEV_GUEST_STATE_LAUNCH_UPDATE,
- SEV_GUEST_STATE_LAUNCH_SECRET,
- SEV_GUEST_STATE_RUNNING,
-};
-
-#define SEV_POLICY_NO_DBG (1UL << 0)
-#define SEV_POLICY_ES (1UL << 2)
-
-#define GHCB_MSR_TERM_REQ 0x100
-
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
-void sev_vm_launch_finish(struct kvm_vm *vm);
-
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
- struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
-
-kvm_static_assert(SEV_RET_SUCCESS == 0);
-
-/*
- * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
- * instead of a proper struct. The size of the parameter is embedded in the
- * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
- * creating an overlay to pass in an "unsigned long" without a cast (casting
- * will make the compiler unhappy due to dereferencing an aliased pointer).
- */
-#define __vm_sev_ioctl(vm, cmd, arg) \
-({ \
- int r; \
- \
- union { \
- struct kvm_sev_cmd c; \
- unsigned long raw; \
- } sev_cmd = { .c = { \
- .id = (cmd), \
- .data = (uint64_t)(arg), \
- .sev_fd = (vm)->arch.sev_fd, \
- } }; \
- \
- r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \
- r ?: sev_cmd.c.error; \
-})
-
-#define vm_sev_ioctl(vm, cmd, arg) \
-({ \
- int ret = __vm_sev_ioctl(vm, cmd, arg); \
- \
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
-})
-
-void sev_vm_init(struct kvm_vm *vm);
-void sev_es_vm_init(struct kvm_vm *vm);
-
-static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
- struct userspace_mem_region *region)
-{
- struct kvm_enc_region range = {
- .addr = region->region.userspace_addr,
- .size = region->region.memory_size,
- };
-
- vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
-}
-
-static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
- uint64_t size)
-{
- struct kvm_sev_launch_update_data update_data = {
- .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
- .len = size,
- };
-
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
-}
-
-#endif /* SELFTEST_KVM_SEV_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm.h
- * This is a copy of arch/x86/include/asm/svm.h
- *
- */
-
-#ifndef SELFTEST_KVM_SVM_H
-#define SELFTEST_KVM_SVM_H
-
-enum {
- INTERCEPT_INTR,
- INTERCEPT_NMI,
- INTERCEPT_SMI,
- INTERCEPT_INIT,
- INTERCEPT_VINTR,
- INTERCEPT_SELECTIVE_CR0,
- INTERCEPT_STORE_IDTR,
- INTERCEPT_STORE_GDTR,
- INTERCEPT_STORE_LDTR,
- INTERCEPT_STORE_TR,
- INTERCEPT_LOAD_IDTR,
- INTERCEPT_LOAD_GDTR,
- INTERCEPT_LOAD_LDTR,
- INTERCEPT_LOAD_TR,
- INTERCEPT_RDTSC,
- INTERCEPT_RDPMC,
- INTERCEPT_PUSHF,
- INTERCEPT_POPF,
- INTERCEPT_CPUID,
- INTERCEPT_RSM,
- INTERCEPT_IRET,
- INTERCEPT_INTn,
- INTERCEPT_INVD,
- INTERCEPT_PAUSE,
- INTERCEPT_HLT,
- INTERCEPT_INVLPG,
- INTERCEPT_INVLPGA,
- INTERCEPT_IOIO_PROT,
- INTERCEPT_MSR_PROT,
- INTERCEPT_TASK_SWITCH,
- INTERCEPT_FERR_FREEZE,
- INTERCEPT_SHUTDOWN,
- INTERCEPT_VMRUN,
- INTERCEPT_VMMCALL,
- INTERCEPT_VMLOAD,
- INTERCEPT_VMSAVE,
- INTERCEPT_STGI,
- INTERCEPT_CLGI,
- INTERCEPT_SKINIT,
- INTERCEPT_RDTSCP,
- INTERCEPT_ICEBP,
- INTERCEPT_WBINVD,
- INTERCEPT_MONITOR,
- INTERCEPT_MWAIT,
- INTERCEPT_MWAIT_COND,
- INTERCEPT_XSETBV,
- INTERCEPT_RDPRU,
-};
-
-struct hv_vmcb_enlightenments {
- struct __packed hv_enlightenments_control {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 enlightened_npt_tlb: 1;
- u32 reserved:29;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB
- */
-#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
-
-/* Synthetic VM-Exit */
-#define HV_SVM_EXITCODE_ENL 0xf0000000
-#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
- u8 reserved_1[40];
- u16 pause_filter_thresh;
- u16 pause_filter_count;
- u64 iopm_base_pa;
- u64 msrpm_base_pa;
- u64 tsc_offset;
- u32 asid;
- u8 tlb_ctl;
- u8 reserved_2[3];
- u32 int_ctl;
- u32 int_vector;
- u32 int_state;
- u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
- u64 exit_info_1;
- u64 exit_info_2;
- u32 exit_int_info;
- u32 exit_int_info_err;
- u64 nested_ctl;
- u64 avic_vapic_bar;
- u8 reserved_4[8];
- u32 event_inj;
- u32 event_inj_err;
- u64 nested_cr3;
- u64 virt_ext;
- u32 clean;
- u32 reserved_5;
- u64 next_rip;
- u8 insn_len;
- u8 insn_bytes[15];
- u64 avic_backing_page; /* Offset 0xe0 */
- u8 reserved_6[8]; /* Offset 0xe8 */
- u64 avic_logical_id; /* Offset 0xf0 */
- u64 avic_physical_id; /* Offset 0xf8 */
- u8 reserved_7[8];
- u64 vmsa_pa; /* Used for an SEV-ES guest */
- u8 reserved_8[720];
- /*
- * Offset 0x3e0, 32 bytes reserved
- * for use by hypervisor/software.
- */
- union {
- struct hv_vmcb_enlightenments hv_enlightenments;
- u8 reserved_sw[32];
- };
-};
-
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-#define TLB_CONTROL_FLUSH_ASID 3
-#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_GIF_SHIFT 9
-#define V_GIF_MASK (1 << V_GIF_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define V_GIF_ENABLE_SHIFT 25
-#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
-
-#define AVIC_ENABLE_SHIFT 31
-#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
-
-#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
-#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-#define SVM_VM_CR_VALID_MASK 0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
-
-#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
-#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
-
-struct __attribute__ ((__packed__)) vmcb_seg {
- u16 selector;
- u16 attrib;
- u32 limit;
- u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
- struct vmcb_seg es;
- struct vmcb_seg cs;
- struct vmcb_seg ss;
- struct vmcb_seg ds;
- struct vmcb_seg fs;
- struct vmcb_seg gs;
- struct vmcb_seg gdtr;
- struct vmcb_seg ldtr;
- struct vmcb_seg idtr;
- struct vmcb_seg tr;
- u8 reserved_1[43];
- u8 cpl;
- u8 reserved_2[4];
- u64 efer;
- u8 reserved_3[112];
- u64 cr4;
- u64 cr3;
- u64 cr0;
- u64 dr7;
- u64 dr6;
- u64 rflags;
- u64 rip;
- u8 reserved_4[88];
- u64 rsp;
- u8 reserved_5[24];
- u64 rax;
- u64 star;
- u64 lstar;
- u64 cstar;
- u64 sfmask;
- u64 kernel_gs_base;
- u64 sysenter_cs;
- u64 sysenter_esp;
- u64 sysenter_eip;
- u64 cr2;
- u8 reserved_6[32];
- u64 g_pat;
- u64 dbgctl;
- u64 br_from;
- u64 br_to;
- u64 last_excp_from;
- u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
- struct vmcb_control_area control;
- struct vmcb_save_area save;
-};
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_READ 0
-#define INTERCEPT_CR3_READ 3
-#define INTERCEPT_CR4_READ 4
-#define INTERCEPT_CR8_READ 8
-#define INTERCEPT_CR0_WRITE (16 + 0)
-#define INTERCEPT_CR3_WRITE (16 + 3)
-#define INTERCEPT_CR4_WRITE (16 + 4)
-#define INTERCEPT_CR8_WRITE (16 + 8)
-
-#define INTERCEPT_DR0_READ 0
-#define INTERCEPT_DR1_READ 1
-#define INTERCEPT_DR2_READ 2
-#define INTERCEPT_DR3_READ 3
-#define INTERCEPT_DR4_READ 4
-#define INTERCEPT_DR5_READ 5
-#define INTERCEPT_DR6_READ 6
-#define INTERCEPT_DR7_READ 7
-#define INTERCEPT_DR0_WRITE (16 + 0)
-#define INTERCEPT_DR1_WRITE (16 + 1)
-#define INTERCEPT_DR2_WRITE (16 + 2)
-#define INTERCEPT_DR3_WRITE (16 + 3)
-#define INTERCEPT_DR4_WRITE (16 + 4)
-#define INTERCEPT_DR5_WRITE (16 + 5)
-#define INTERCEPT_DR6_WRITE (16 + 6)
-#define INTERCEPT_DR7_WRITE (16 + 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
-
-#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
-
-#define SVM_EXITINFO_REG_MASK 0x0F
-
-#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
-
-#endif /* SELFTEST_KVM_SVM_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
- * Header for nested SVM testing
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#ifndef SELFTEST_KVM_SVM_UTILS_H
-#define SELFTEST_KVM_SVM_UTILS_H
-
-#include <asm/svm.h>
-
-#include <stdint.h>
-#include "svm.h"
-#include "processor.h"
-
-struct svm_test_data {
- /* VMCB */
- struct vmcb *vmcb; /* gva */
- void *vmcb_hva;
- uint64_t vmcb_gpa;
-
- /* host state-save area */
- struct vmcb_save_area *save_area; /* gva */
- void *save_area_hva;
- uint64_t save_area_gpa;
-
- /* MSR-Bitmap */
- void *msr; /* gva */
- void *msr_hva;
- uint64_t msr_gpa;
-};
-
-static inline void vmmcall(void)
-{
- /*
- * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
- * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
- * use of this function is to exit to L1 from L2. Clobber all other
- * GPRs as L1 doesn't correctly preserve them during vmexits.
- */
- __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
- : : "a"(0xdeadbeef), "c"(0xbeefdead)
- : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-#define stgi() \
- __asm__ __volatile__( \
- "stgi\n" \
- )
-
-#define clgi() \
- __asm__ __volatile__( \
- "clgi\n" \
- )
-
-struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
-
-int open_sev_dev_path_or_exit(void);
-
-#endif /* SELFTEST_KVM_SVM_UTILS_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON KVM_EXIT_IO
-
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/vmx.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_VMX_H
-#define SELFTEST_KVM_VMX_H
-
-#include <asm/vmx.h>
-
-#include <stdint.h>
-#include "processor.h"
-#include "apic.h"
-
-/*
- * Definitions of Primary Processor-Based VM-Execution Controls.
- */
-#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
-#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008
-#define CPU_BASED_HLT_EXITING 0x00000080
-#define CPU_BASED_INVLPG_EXITING 0x00000200
-#define CPU_BASED_MWAIT_EXITING 0x00000400
-#define CPU_BASED_RDPMC_EXITING 0x00000800
-#define CPU_BASED_RDTSC_EXITING 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
-#define CPU_BASED_CR3_STORE_EXITING 0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
-#define CPU_BASED_CR8_STORE_EXITING 0x00100000
-#define CPU_BASED_TPR_SHADOW 0x00200000
-#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000
-#define CPU_BASED_MOV_DR_EXITING 0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
-#define CPU_BASED_USE_IO_BITMAPS 0x02000000
-#define CPU_BASED_MONITOR_TRAP 0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
-#define CPU_BASED_MONITOR_EXITING 0x20000000
-#define CPU_BASED_PAUSE_EXITING 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
-
-#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
-
-/*
- * Definitions of Secondary Processor-Based VM-Execution Controls.
- */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
-#define SECONDARY_EXEC_DESC 0x00000004
-#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
-#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
-#define SECONDARY_EXEC_ENABLE_PML 0x00020000
-#define SECONDARY_EPT_VE 0x00040000
-#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
-#define SECONDARY_EXEC_TSC_SCALING 0x02000000
-
-#define PIN_BASED_EXT_INTR_MASK 0x00000001
-#define PIN_BASED_NMI_EXITING 0x00000008
-#define PIN_BASED_VIRTUAL_NMIS 0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
-#define PIN_BASED_POSTED_INTR 0x00000080
-
-#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
-
-#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
-#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
-#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
-#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
-#define VM_EXIT_SAVE_IA32_PAT 0x00040000
-#define VM_EXIT_LOAD_IA32_PAT 0x00080000
-#define VM_EXIT_SAVE_IA32_EFER 0x00100000
-#define VM_EXIT_LOAD_IA32_EFER 0x00200000
-#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
-
-#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
-
-#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
-#define VM_ENTRY_IA32E_MODE 0x00000200
-#define VM_ENTRY_SMM 0x00000400
-#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
-#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
-#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
-#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
-
-#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
-
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA 0x00000020
-
-#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
-#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
-
-#define EXIT_REASON_FAILED_VMENTRY 0x80000000
-
-enum vmcs_field {
- VIRTUAL_PROCESSOR_ID = 0x00000000,
- POSTED_INTR_NV = 0x00000002,
- GUEST_ES_SELECTOR = 0x00000800,
- GUEST_CS_SELECTOR = 0x00000802,
- GUEST_SS_SELECTOR = 0x00000804,
- GUEST_DS_SELECTOR = 0x00000806,
- GUEST_FS_SELECTOR = 0x00000808,
- GUEST_GS_SELECTOR = 0x0000080a,
- GUEST_LDTR_SELECTOR = 0x0000080c,
- GUEST_TR_SELECTOR = 0x0000080e,
- GUEST_INTR_STATUS = 0x00000810,
- GUEST_PML_INDEX = 0x00000812,
- HOST_ES_SELECTOR = 0x00000c00,
- HOST_CS_SELECTOR = 0x00000c02,
- HOST_SS_SELECTOR = 0x00000c04,
- HOST_DS_SELECTOR = 0x00000c06,
- HOST_FS_SELECTOR = 0x00000c08,
- HOST_GS_SELECTOR = 0x00000c0a,
- HOST_TR_SELECTOR = 0x00000c0c,
- IO_BITMAP_A = 0x00002000,
- IO_BITMAP_A_HIGH = 0x00002001,
- IO_BITMAP_B = 0x00002002,
- IO_BITMAP_B_HIGH = 0x00002003,
- MSR_BITMAP = 0x00002004,
- MSR_BITMAP_HIGH = 0x00002005,
- VM_EXIT_MSR_STORE_ADDR = 0x00002006,
- VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
- VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
- VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
- VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
- VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
- PML_ADDRESS = 0x0000200e,
- PML_ADDRESS_HIGH = 0x0000200f,
- TSC_OFFSET = 0x00002010,
- TSC_OFFSET_HIGH = 0x00002011,
- VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
- VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
- APIC_ACCESS_ADDR = 0x00002014,
- APIC_ACCESS_ADDR_HIGH = 0x00002015,
- POSTED_INTR_DESC_ADDR = 0x00002016,
- POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
- EPT_POINTER = 0x0000201a,
- EPT_POINTER_HIGH = 0x0000201b,
- EOI_EXIT_BITMAP0 = 0x0000201c,
- EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
- EOI_EXIT_BITMAP1 = 0x0000201e,
- EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
- EOI_EXIT_BITMAP2 = 0x00002020,
- EOI_EXIT_BITMAP2_HIGH = 0x00002021,
- EOI_EXIT_BITMAP3 = 0x00002022,
- EOI_EXIT_BITMAP3_HIGH = 0x00002023,
- VMREAD_BITMAP = 0x00002026,
- VMREAD_BITMAP_HIGH = 0x00002027,
- VMWRITE_BITMAP = 0x00002028,
- VMWRITE_BITMAP_HIGH = 0x00002029,
- XSS_EXIT_BITMAP = 0x0000202C,
- XSS_EXIT_BITMAP_HIGH = 0x0000202D,
- ENCLS_EXITING_BITMAP = 0x0000202E,
- ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
- TSC_MULTIPLIER = 0x00002032,
- TSC_MULTIPLIER_HIGH = 0x00002033,
- GUEST_PHYSICAL_ADDRESS = 0x00002400,
- GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
- VMCS_LINK_POINTER = 0x00002800,
- VMCS_LINK_POINTER_HIGH = 0x00002801,
- GUEST_IA32_DEBUGCTL = 0x00002802,
- GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
- GUEST_IA32_PAT = 0x00002804,
- GUEST_IA32_PAT_HIGH = 0x00002805,
- GUEST_IA32_EFER = 0x00002806,
- GUEST_IA32_EFER_HIGH = 0x00002807,
- GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
- GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
- GUEST_PDPTR0 = 0x0000280a,
- GUEST_PDPTR0_HIGH = 0x0000280b,
- GUEST_PDPTR1 = 0x0000280c,
- GUEST_PDPTR1_HIGH = 0x0000280d,
- GUEST_PDPTR2 = 0x0000280e,
- GUEST_PDPTR2_HIGH = 0x0000280f,
- GUEST_PDPTR3 = 0x00002810,
- GUEST_PDPTR3_HIGH = 0x00002811,
- GUEST_BNDCFGS = 0x00002812,
- GUEST_BNDCFGS_HIGH = 0x00002813,
- HOST_IA32_PAT = 0x00002c00,
- HOST_IA32_PAT_HIGH = 0x00002c01,
- HOST_IA32_EFER = 0x00002c02,
- HOST_IA32_EFER_HIGH = 0x00002c03,
- HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
- HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
- PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
- CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
- EXCEPTION_BITMAP = 0x00004004,
- PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
- PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
- CR3_TARGET_COUNT = 0x0000400a,
- VM_EXIT_CONTROLS = 0x0000400c,
- VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
- VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
- VM_ENTRY_CONTROLS = 0x00004012,
- VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
- VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
- VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
- VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
- TPR_THRESHOLD = 0x0000401c,
- SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
- PLE_GAP = 0x00004020,
- PLE_WINDOW = 0x00004022,
- VM_INSTRUCTION_ERROR = 0x00004400,
- VM_EXIT_REASON = 0x00004402,
- VM_EXIT_INTR_INFO = 0x00004404,
- VM_EXIT_INTR_ERROR_CODE = 0x00004406,
- IDT_VECTORING_INFO_FIELD = 0x00004408,
- IDT_VECTORING_ERROR_CODE = 0x0000440a,
- VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
- VMX_INSTRUCTION_INFO = 0x0000440e,
- GUEST_ES_LIMIT = 0x00004800,
- GUEST_CS_LIMIT = 0x00004802,
- GUEST_SS_LIMIT = 0x00004804,
- GUEST_DS_LIMIT = 0x00004806,
- GUEST_FS_LIMIT = 0x00004808,
- GUEST_GS_LIMIT = 0x0000480a,
- GUEST_LDTR_LIMIT = 0x0000480c,
- GUEST_TR_LIMIT = 0x0000480e,
- GUEST_GDTR_LIMIT = 0x00004810,
- GUEST_IDTR_LIMIT = 0x00004812,
- GUEST_ES_AR_BYTES = 0x00004814,
- GUEST_CS_AR_BYTES = 0x00004816,
- GUEST_SS_AR_BYTES = 0x00004818,
- GUEST_DS_AR_BYTES = 0x0000481a,
- GUEST_FS_AR_BYTES = 0x0000481c,
- GUEST_GS_AR_BYTES = 0x0000481e,
- GUEST_LDTR_AR_BYTES = 0x00004820,
- GUEST_TR_AR_BYTES = 0x00004822,
- GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
- GUEST_ACTIVITY_STATE = 0X00004826,
- GUEST_SYSENTER_CS = 0x0000482A,
- VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
- HOST_IA32_SYSENTER_CS = 0x00004c00,
- CR0_GUEST_HOST_MASK = 0x00006000,
- CR4_GUEST_HOST_MASK = 0x00006002,
- CR0_READ_SHADOW = 0x00006004,
- CR4_READ_SHADOW = 0x00006006,
- CR3_TARGET_VALUE0 = 0x00006008,
- CR3_TARGET_VALUE1 = 0x0000600a,
- CR3_TARGET_VALUE2 = 0x0000600c,
- CR3_TARGET_VALUE3 = 0x0000600e,
- EXIT_QUALIFICATION = 0x00006400,
- GUEST_LINEAR_ADDRESS = 0x0000640a,
- GUEST_CR0 = 0x00006800,
- GUEST_CR3 = 0x00006802,
- GUEST_CR4 = 0x00006804,
- GUEST_ES_BASE = 0x00006806,
- GUEST_CS_BASE = 0x00006808,
- GUEST_SS_BASE = 0x0000680a,
- GUEST_DS_BASE = 0x0000680c,
- GUEST_FS_BASE = 0x0000680e,
- GUEST_GS_BASE = 0x00006810,
- GUEST_LDTR_BASE = 0x00006812,
- GUEST_TR_BASE = 0x00006814,
- GUEST_GDTR_BASE = 0x00006816,
- GUEST_IDTR_BASE = 0x00006818,
- GUEST_DR7 = 0x0000681a,
- GUEST_RSP = 0x0000681c,
- GUEST_RIP = 0x0000681e,
- GUEST_RFLAGS = 0x00006820,
- GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
- GUEST_SYSENTER_ESP = 0x00006824,
- GUEST_SYSENTER_EIP = 0x00006826,
- HOST_CR0 = 0x00006c00,
- HOST_CR3 = 0x00006c02,
- HOST_CR4 = 0x00006c04,
- HOST_FS_BASE = 0x00006c06,
- HOST_GS_BASE = 0x00006c08,
- HOST_TR_BASE = 0x00006c0a,
- HOST_GDTR_BASE = 0x00006c0c,
- HOST_IDTR_BASE = 0x00006c0e,
- HOST_IA32_SYSENTER_ESP = 0x00006c10,
- HOST_IA32_SYSENTER_EIP = 0x00006c12,
- HOST_RSP = 0x00006c14,
- HOST_RIP = 0x00006c16,
-};
-
-struct vmx_msr_entry {
- uint32_t index;
- uint32_t reserved;
- uint64_t value;
-} __attribute__ ((aligned(16)));
-
-#include "evmcs.h"
-
-static inline int vmxon(uint64_t phys)
-{
- uint8_t ret;
-
- __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
- : [ret]"=rm"(ret)
- : [pa]"m"(phys)
- : "cc", "memory");
-
- return ret;
-}
-
-static inline void vmxoff(void)
-{
- __asm__ __volatile__("vmxoff");
-}
-
-static inline int vmclear(uint64_t vmcs_pa)
-{
- uint8_t ret;
-
- __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
- : [ret]"=rm"(ret)
- : [pa]"m"(vmcs_pa)
- : "cc", "memory");
-
- return ret;
-}
-
-static inline int vmptrld(uint64_t vmcs_pa)
-{
- uint8_t ret;
-
- if (enable_evmcs)
- return -1;
-
- __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
- : [ret]"=rm"(ret)
- : [pa]"m"(vmcs_pa)
- : "cc", "memory");
-
- return ret;
-}
-
-static inline int vmptrst(uint64_t *value)
-{
- uint64_t tmp;
- uint8_t ret;
-
- if (enable_evmcs)
- return evmcs_vmptrst(value);
-
- __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
- : [value]"=m"(tmp), [ret]"=rm"(ret)
- : : "cc", "memory");
-
- *value = tmp;
- return ret;
-}
-
-/*
- * A wrapper around vmptrst that ignores errors and returns zero if the
- * vmptrst instruction fails.
- */
-static inline uint64_t vmptrstz(void)
-{
- uint64_t value = 0;
- vmptrst(&value);
- return value;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmlaunch.
- */
-static inline int vmlaunch(void)
-{
- int ret;
-
- if (enable_evmcs)
- return evmcs_vmlaunch();
-
- __asm__ __volatile__("push %%rbp;"
- "push %%rcx;"
- "push %%rdx;"
- "push %%rsi;"
- "push %%rdi;"
- "push $0;"
- "vmwrite %%rsp, %[host_rsp];"
- "lea 1f(%%rip), %%rax;"
- "vmwrite %%rax, %[host_rip];"
- "vmlaunch;"
- "incq (%%rsp);"
- "1: pop %%rax;"
- "pop %%rdi;"
- "pop %%rsi;"
- "pop %%rdx;"
- "pop %%rcx;"
- "pop %%rbp;"
- : [ret]"=&a"(ret)
- : [host_rsp]"r"((uint64_t)HOST_RSP),
- [host_rip]"r"((uint64_t)HOST_RIP)
- : "memory", "cc", "rbx", "r8", "r9", "r10",
- "r11", "r12", "r13", "r14", "r15");
- return ret;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmresume.
- */
-static inline int vmresume(void)
-{
- int ret;
-
- if (enable_evmcs)
- return evmcs_vmresume();
-
- __asm__ __volatile__("push %%rbp;"
- "push %%rcx;"
- "push %%rdx;"
- "push %%rsi;"
- "push %%rdi;"
- "push $0;"
- "vmwrite %%rsp, %[host_rsp];"
- "lea 1f(%%rip), %%rax;"
- "vmwrite %%rax, %[host_rip];"
- "vmresume;"
- "incq (%%rsp);"
- "1: pop %%rax;"
- "pop %%rdi;"
- "pop %%rsi;"
- "pop %%rdx;"
- "pop %%rcx;"
- "pop %%rbp;"
- : [ret]"=&a"(ret)
- : [host_rsp]"r"((uint64_t)HOST_RSP),
- [host_rip]"r"((uint64_t)HOST_RIP)
- : "memory", "cc", "rbx", "r8", "r9", "r10",
- "r11", "r12", "r13", "r14", "r15");
- return ret;
-}
-
-static inline void vmcall(void)
-{
- /*
- * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
- * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
- * use of this function is to exit to L1 from L2. Clobber all other
- * GPRs as L1 doesn't correctly preserve them during vmexits.
- */
- __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
- : : "a"(0xdeadbeef), "c"(0xbeefdead)
- : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-static inline int vmread(uint64_t encoding, uint64_t *value)
-{
- uint64_t tmp;
- uint8_t ret;
-
- if (enable_evmcs)
- return evmcs_vmread(encoding, value);
-
- __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
- : [value]"=rm"(tmp), [ret]"=rm"(ret)
- : [encoding]"r"(encoding)
- : "cc", "memory");
-
- *value = tmp;
- return ret;
-}
-
-/*
- * A wrapper around vmread that ignores errors and returns zero if the
- * vmread instruction fails.
- */
-static inline uint64_t vmreadz(uint64_t encoding)
-{
- uint64_t value = 0;
- vmread(encoding, &value);
- return value;
-}
-
-static inline int vmwrite(uint64_t encoding, uint64_t value)
-{
- uint8_t ret;
-
- if (enable_evmcs)
- return evmcs_vmwrite(encoding, value);
-
- __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
- : [ret]"=rm"(ret)
- : [value]"rm"(value), [encoding]"r"(encoding)
- : "cc", "memory");
-
- return ret;
-}
-
-static inline uint32_t vmcs_revision(void)
-{
- return rdmsr(MSR_IA32_VMX_BASIC);
-}
-
-struct vmx_pages {
- void *vmxon_hva;
- uint64_t vmxon_gpa;
- void *vmxon;
-
- void *vmcs_hva;
- uint64_t vmcs_gpa;
- void *vmcs;
-
- void *msr_hva;
- uint64_t msr_gpa;
- void *msr;
-
- void *shadow_vmcs_hva;
- uint64_t shadow_vmcs_gpa;
- void *shadow_vmcs;
-
- void *vmread_hva;
- uint64_t vmread_gpa;
- void *vmread;
-
- void *vmwrite_hva;
- uint64_t vmwrite_gpa;
- void *vmwrite;
-
- void *eptp_hva;
- uint64_t eptp_gpa;
- void *eptp;
-
- void *apic_access_hva;
- uint64_t apic_access_gpa;
- void *apic_access;
-};
-
-union vmx_basic {
- u64 val;
- struct {
- u32 revision;
- u32 size:13,
- reserved1:3,
- width:1,
- dual:1,
- type:4,
- insouts:1,
- ctrl:1,
- vm_entry_exception_ctrl:1,
- reserved2:7;
- };
-};
-
-union vmx_ctrl_msr {
- u64 val;
- struct {
- u32 set, clr;
- };
-};
-
-struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
-bool prepare_for_vmx_operation(struct vmx_pages *vmx);
-void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
-bool load_vmcs(struct vmx_pages *vmx);
-
-bool ept_1g_pages_supported(void);
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr);
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size);
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot);
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size);
-bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
-
-#endif /* SELFTEST_KVM_VMX_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) support
- */
-
-#include <errno.h>
-#include <linux/bits.h>
-#include <linux/sizes.h>
-
-#include "kvm_util.h"
-
-#include <gic.h>
-#include "gic_private.h"
-#include "processor.h"
-#include "spinlock.h"
-
-static const struct gic_common_ops *gic_common_ops;
-static struct spinlock gic_lock;
-
-static void gic_cpu_init(unsigned int cpu)
-{
- gic_common_ops->gic_cpu_init(cpu);
-}
-
-static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
-{
- const struct gic_common_ops *gic_ops = NULL;
-
- spin_lock(&gic_lock);
-
- /* Distributor initialization is needed only once per VM */
- if (gic_common_ops) {
- spin_unlock(&gic_lock);
- return;
- }
-
- if (type == GIC_V3)
- gic_ops = &gicv3_ops;
-
- GUEST_ASSERT(gic_ops);
-
- gic_ops->gic_init(nr_cpus);
- gic_common_ops = gic_ops;
-
- /* Make sure that the initialized data is visible to all the vCPUs */
- dsb(sy);
-
- spin_unlock(&gic_lock);
-}
-
-void gic_init(enum gic_type type, unsigned int nr_cpus)
-{
- uint32_t cpu = guest_get_vcpuid();
-
- GUEST_ASSERT(type < GIC_TYPE_MAX);
- GUEST_ASSERT(nr_cpus);
-
- gic_dist_init(type, nr_cpus);
- gic_cpu_init(cpu);
-}
-
-void gic_irq_enable(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_enable(intid);
-}
-
-void gic_irq_disable(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_disable(intid);
-}
-
-unsigned int gic_get_and_ack_irq(void)
-{
- uint64_t irqstat;
- unsigned int intid;
-
- GUEST_ASSERT(gic_common_ops);
-
- irqstat = gic_common_ops->gic_read_iar();
- intid = irqstat & GENMASK(23, 0);
-
- return intid;
-}
-
-void gic_set_eoi(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_write_eoir(intid);
-}
-
-void gic_set_dir(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_write_dir(intid);
-}
-
-void gic_set_eoi_split(bool split)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_set_eoi_split(split);
-}
-
-void gic_set_priority_mask(uint64_t pmr)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_set_priority_mask(pmr);
-}
-
-void gic_set_priority(unsigned int intid, unsigned int prio)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_set_priority(intid, prio);
-}
-
-void gic_irq_set_active(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_set_active(intid);
-}
-
-void gic_irq_clear_active(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_clear_active(intid);
-}
-
-bool gic_irq_get_active(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- return gic_common_ops->gic_irq_get_active(intid);
-}
-
-void gic_irq_set_pending(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_set_pending(intid);
-}
-
-void gic_irq_clear_pending(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_clear_pending(intid);
-}
-
-bool gic_irq_get_pending(unsigned int intid)
-{
- GUEST_ASSERT(gic_common_ops);
- return gic_common_ops->gic_irq_get_pending(intid);
-}
-
-void gic_irq_set_config(unsigned int intid, bool is_edge)
-{
- GUEST_ASSERT(gic_common_ops);
- gic_common_ops->gic_irq_set_config(intid, is_edge);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) private defines that's only
- * shared among the GIC library code.
- */
-
-#ifndef SELFTEST_KVM_GIC_PRIVATE_H
-#define SELFTEST_KVM_GIC_PRIVATE_H
-
-struct gic_common_ops {
- void (*gic_init)(unsigned int nr_cpus);
- void (*gic_cpu_init)(unsigned int cpu);
- void (*gic_irq_enable)(unsigned int intid);
- void (*gic_irq_disable)(unsigned int intid);
- uint64_t (*gic_read_iar)(void);
- void (*gic_write_eoir)(uint32_t irq);
- void (*gic_write_dir)(uint32_t irq);
- void (*gic_set_eoi_split)(bool split);
- void (*gic_set_priority_mask)(uint64_t mask);
- void (*gic_set_priority)(uint32_t intid, uint32_t prio);
- void (*gic_irq_set_active)(uint32_t intid);
- void (*gic_irq_clear_active)(uint32_t intid);
- bool (*gic_irq_get_active)(uint32_t intid);
- void (*gic_irq_set_pending)(uint32_t intid);
- void (*gic_irq_clear_pending)(uint32_t intid);
- bool (*gic_irq_get_pending)(uint32_t intid);
- void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
-};
-
-extern const struct gic_common_ops gicv3_ops;
-
-#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) v3 support
- */
-
-#include <linux/sizes.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "delay.h"
-
-#include "gic.h"
-#include "gic_v3.h"
-#include "gic_private.h"
-
-#define GICV3_MAX_CPUS 512
-
-#define GICD_INT_DEF_PRI 0xa0
-#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
- (GICD_INT_DEF_PRI << 16) |\
- (GICD_INT_DEF_PRI << 8) |\
- GICD_INT_DEF_PRI)
-
-#define ICC_PMR_DEF_PRIO 0xf0
-
-struct gicv3_data {
- unsigned int nr_cpus;
- unsigned int nr_spis;
-};
-
-#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
-#define DIST_BIT (1U << 31)
-
-enum gicv3_intid_range {
- SGI_RANGE,
- PPI_RANGE,
- SPI_RANGE,
- INVALID_RANGE,
-};
-
-static struct gicv3_data gicv3_data;
-
-static void gicv3_gicd_wait_for_rwp(void)
-{
- unsigned int count = 100000; /* 1s */
-
- while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
- GUEST_ASSERT(count--);
- udelay(10);
- }
-}
-
-static inline volatile void *gicr_base_cpu(uint32_t cpu)
-{
- /* Align all the redistributors sequentially */
- return GICR_BASE_GVA + cpu * SZ_64K * 2;
-}
-
-static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
-{
- unsigned int count = 100000; /* 1s */
-
- while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
- GUEST_ASSERT(count--);
- udelay(10);
- }
-}
-
-static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
-{
- if (cpu_or_dist & DIST_BIT)
- gicv3_gicd_wait_for_rwp();
- else
- gicv3_gicr_wait_for_rwp(cpu_or_dist);
-}
-
-static enum gicv3_intid_range get_intid_range(unsigned int intid)
-{
- switch (intid) {
- case 0 ... 15:
- return SGI_RANGE;
- case 16 ... 31:
- return PPI_RANGE;
- case 32 ... 1019:
- return SPI_RANGE;
- }
-
- /* We should not be reaching here */
- GUEST_ASSERT(0);
-
- return INVALID_RANGE;
-}
-
-static uint64_t gicv3_read_iar(void)
-{
- uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
-
- dsb(sy);
- return irqstat;
-}
-
-static void gicv3_write_eoir(uint32_t irq)
-{
- write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
- isb();
-}
-
-static void gicv3_write_dir(uint32_t irq)
-{
- write_sysreg_s(irq, SYS_ICC_DIR_EL1);
- isb();
-}
-
-static void gicv3_set_priority_mask(uint64_t mask)
-{
- write_sysreg_s(mask, SYS_ICC_PMR_EL1);
-}
-
-static void gicv3_set_eoi_split(bool split)
-{
- uint32_t val;
-
- /*
- * All other fields are read-only, so no need to read CTLR first. In
- * fact, the kernel does the same.
- */
- val = split ? (1U << 1) : 0;
- write_sysreg_s(val, SYS_ICC_CTLR_EL1);
- isb();
-}
-
-uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
-{
- volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
- : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
- return readl(base + offset);
-}
-
-void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
-{
- volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
- : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
- writel(reg_val, base + offset);
-}
-
-uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
-{
- return gicv3_reg_readl(cpu_or_dist, offset) & mask;
-}
-
-void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
- uint32_t mask, uint32_t reg_val)
-{
- uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
-
- tmp |= (reg_val & mask);
- gicv3_reg_writel(cpu_or_dist, offset, tmp);
-}
-
-/*
- * We use a single offset for the distributor and redistributor maps as they
- * have the same value in both. The only exceptions are registers that only
- * exist in one and not the other, like GICR_WAKER that doesn't exist in the
- * distributor map. Such registers are conveniently marked as reserved in the
- * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
- * marked as "Reserved" in the Distributor map.
- */
-static void gicv3_access_reg(uint32_t intid, uint64_t offset,
- uint32_t reg_bits, uint32_t bits_per_field,
- bool write, uint32_t *val)
-{
- uint32_t cpu = guest_get_vcpuid();
- enum gicv3_intid_range intid_range = get_intid_range(intid);
- uint32_t fields_per_reg, index, mask, shift;
- uint32_t cpu_or_dist;
-
- GUEST_ASSERT(bits_per_field <= reg_bits);
- GUEST_ASSERT(!write || *val < (1U << bits_per_field));
- /*
- * This function does not support 64 bit accesses. Just asserting here
- * until we implement readq/writeq.
- */
- GUEST_ASSERT(reg_bits == 32);
-
- fields_per_reg = reg_bits / bits_per_field;
- index = intid % fields_per_reg;
- shift = index * bits_per_field;
- mask = ((1U << bits_per_field) - 1) << shift;
-
- /* Set offset to the actual register holding intid's config. */
- offset += (intid / fields_per_reg) * (reg_bits / 8);
-
- cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
-
- if (write)
- gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
- *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
-}
-
-static void gicv3_write_reg(uint32_t intid, uint64_t offset,
- uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
-{
- gicv3_access_reg(intid, offset, reg_bits,
- bits_per_field, true, &val);
-}
-
-static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
- uint32_t reg_bits, uint32_t bits_per_field)
-{
- uint32_t val;
-
- gicv3_access_reg(intid, offset, reg_bits,
- bits_per_field, false, &val);
- return val;
-}
-
-static void gicv3_set_priority(uint32_t intid, uint32_t prio)
-{
- gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
-}
-
-/* Sets the intid to be level-sensitive or edge-triggered. */
-static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
-{
- uint32_t val;
-
- /* N/A for private interrupts. */
- GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
- val = is_edge ? 2 : 0;
- gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
-}
-
-static void gicv3_irq_enable(uint32_t intid)
-{
- bool is_spi = get_intid_range(intid) == SPI_RANGE;
- uint32_t cpu = guest_get_vcpuid();
-
- gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
- gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
-}
-
-static void gicv3_irq_disable(uint32_t intid)
-{
- bool is_spi = get_intid_range(intid) == SPI_RANGE;
- uint32_t cpu = guest_get_vcpuid();
-
- gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
- gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
-}
-
-static void gicv3_irq_set_active(uint32_t intid)
-{
- gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
-}
-
-static void gicv3_irq_clear_active(uint32_t intid)
-{
- gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
-}
-
-static bool gicv3_irq_get_active(uint32_t intid)
-{
- return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
-}
-
-static void gicv3_irq_set_pending(uint32_t intid)
-{
- gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
-}
-
-static void gicv3_irq_clear_pending(uint32_t intid)
-{
- gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
-}
-
-static bool gicv3_irq_get_pending(uint32_t intid)
-{
- return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
-}
-
-static void gicv3_enable_redist(volatile void *redist_base)
-{
- uint32_t val = readl(redist_base + GICR_WAKER);
- unsigned int count = 100000; /* 1s */
-
- val &= ~GICR_WAKER_ProcessorSleep;
- writel(val, redist_base + GICR_WAKER);
-
- /* Wait until the processor is 'active' */
- while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
- GUEST_ASSERT(count--);
- udelay(10);
- }
-}
-
-static void gicv3_cpu_init(unsigned int cpu)
-{
- volatile void *sgi_base;
- unsigned int i;
- volatile void *redist_base_cpu;
-
- GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
-
- redist_base_cpu = gicr_base_cpu(cpu);
- sgi_base = sgi_base_from_redist(redist_base_cpu);
-
- gicv3_enable_redist(redist_base_cpu);
-
- /*
- * Mark all the SGI and PPI interrupts as non-secure Group-1.
- * Also, deactivate and disable them.
- */
- writel(~0, sgi_base + GICR_IGROUPR0);
- writel(~0, sgi_base + GICR_ICACTIVER0);
- writel(~0, sgi_base + GICR_ICENABLER0);
-
- /* Set a default priority for all the SGIs and PPIs */
- for (i = 0; i < 32; i += 4)
- writel(GICD_INT_DEF_PRI_X4,
- sgi_base + GICR_IPRIORITYR0 + i);
-
- gicv3_gicr_wait_for_rwp(cpu);
-
- /* Enable the GIC system register (ICC_*) access */
- write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
- SYS_ICC_SRE_EL1);
-
- /* Set a default priority threshold */
- write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
-
- /* Enable non-secure Group-1 interrupts */
- write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
-}
-
-static void gicv3_dist_init(void)
-{
- unsigned int i;
-
- /* Disable the distributor until we set things up */
- writel(0, GICD_BASE_GVA + GICD_CTLR);
- gicv3_gicd_wait_for_rwp();
-
- /*
- * Mark all the SPI interrupts as non-secure Group-1.
- * Also, deactivate and disable them.
- */
- for (i = 32; i < gicv3_data.nr_spis; i += 32) {
- writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
- writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
- writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
- }
-
- /* Set a default priority for all the SPIs */
- for (i = 32; i < gicv3_data.nr_spis; i += 4)
- writel(GICD_INT_DEF_PRI_X4,
- GICD_BASE_GVA + GICD_IPRIORITYR + i);
-
- /* Wait for the settings to sync-in */
- gicv3_gicd_wait_for_rwp();
-
- /* Finally, enable the distributor globally with ARE */
- writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
- GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
- gicv3_gicd_wait_for_rwp();
-}
-
-static void gicv3_init(unsigned int nr_cpus)
-{
- GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
-
- gicv3_data.nr_cpus = nr_cpus;
- gicv3_data.nr_spis = GICD_TYPER_SPIS(
- readl(GICD_BASE_GVA + GICD_TYPER));
- if (gicv3_data.nr_spis > 1020)
- gicv3_data.nr_spis = 1020;
-
- /*
- * Initialize only the distributor for now.
- * The redistributor and CPU interfaces are initialized
- * later for every PE.
- */
- gicv3_dist_init();
-}
-
-const struct gic_common_ops gicv3_ops = {
- .gic_init = gicv3_init,
- .gic_cpu_init = gicv3_cpu_init,
- .gic_irq_enable = gicv3_irq_enable,
- .gic_irq_disable = gicv3_irq_disable,
- .gic_read_iar = gicv3_read_iar,
- .gic_write_eoir = gicv3_write_eoir,
- .gic_write_dir = gicv3_write_dir,
- .gic_set_priority_mask = gicv3_set_priority_mask,
- .gic_set_eoi_split = gicv3_set_eoi_split,
- .gic_set_priority = gicv3_set_priority,
- .gic_irq_set_active = gicv3_irq_set_active,
- .gic_irq_clear_active = gicv3_irq_clear_active,
- .gic_irq_get_active = gicv3_irq_get_active,
- .gic_irq_set_pending = gicv3_irq_set_pending,
- .gic_irq_clear_pending = gicv3_irq_clear_pending,
- .gic_irq_get_pending = gicv3_irq_get_pending,
- .gic_irq_set_config = gicv3_irq_set_config,
-};
-
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
- vm_paddr_t pend_table)
-{
- volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
-
- u32 ctlr;
- u64 val;
-
- val = (cfg_table |
- GICR_PROPBASER_InnerShareable |
- GICR_PROPBASER_RaWaWb |
- ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
- writeq_relaxed(val, rdist_base + GICR_PROPBASER);
-
- val = (pend_table |
- GICR_PENDBASER_InnerShareable |
- GICR_PENDBASER_RaWaWb);
- writeq_relaxed(val, rdist_base + GICR_PENDBASER);
-
- ctlr = readl_relaxed(rdist_base + GICR_CTLR);
- ctlr |= GICR_CTLR_ENABLE_LPIS;
- writel_relaxed(ctlr, rdist_base + GICR_CTLR);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
- * over in the kernel tree.
- */
-
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <asm/kvm_para.h>
-#include <asm/kvm.h>
-
-#include "kvm_util.h"
-#include "vgic.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "processor.h"
-
-static u64 its_read_u64(unsigned long offset)
-{
- return readq_relaxed(GITS_BASE_GVA + offset);
-}
-
-static void its_write_u64(unsigned long offset, u64 val)
-{
- writeq_relaxed(val, GITS_BASE_GVA + offset);
-}
-
-static u32 its_read_u32(unsigned long offset)
-{
- return readl_relaxed(GITS_BASE_GVA + offset);
-}
-
-static void its_write_u32(unsigned long offset, u32 val)
-{
- writel_relaxed(val, GITS_BASE_GVA + offset);
-}
-
-static unsigned long its_find_baser(unsigned int type)
-{
- int i;
-
- for (i = 0; i < GITS_BASER_NR_REGS; i++) {
- u64 baser;
- unsigned long offset = GITS_BASER + (i * sizeof(baser));
-
- baser = its_read_u64(offset);
- if (GITS_BASER_TYPE(baser) == type)
- return offset;
- }
-
- GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
- return -1;
-}
-
-static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
-{
- unsigned long offset = its_find_baser(type);
- u64 baser;
-
- baser = ((size / SZ_64K) - 1) |
- GITS_BASER_PAGE_SIZE_64K |
- GITS_BASER_InnerShareable |
- base |
- GITS_BASER_RaWaWb |
- GITS_BASER_VALID;
-
- its_write_u64(offset, baser);
-}
-
-static void its_install_cmdq(vm_paddr_t base, size_t size)
-{
- u64 cbaser;
-
- cbaser = ((size / SZ_4K) - 1) |
- GITS_CBASER_InnerShareable |
- base |
- GITS_CBASER_RaWaWb |
- GITS_CBASER_VALID;
-
- its_write_u64(GITS_CBASER, cbaser);
-}
-
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
- vm_paddr_t device_tbl, size_t device_tbl_sz,
- vm_paddr_t cmdq, size_t cmdq_size)
-{
- u32 ctlr;
-
- its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
- its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
- its_install_cmdq(cmdq, cmdq_size);
-
- ctlr = its_read_u32(GITS_CTLR);
- ctlr |= GITS_CTLR_ENABLE;
- its_write_u32(GITS_CTLR, ctlr);
-}
-
-struct its_cmd_block {
- union {
- u64 raw_cmd[4];
- __le64 raw_cmd_le[4];
- };
-};
-
-static inline void its_fixup_cmd(struct its_cmd_block *cmd)
-{
- /* Let's fixup BE commands */
- cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
- cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
- cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
- cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
-}
-
-static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
-{
- u64 mask = GENMASK_ULL(h, l);
- *raw_cmd &= ~mask;
- *raw_cmd |= (val << l) & mask;
-}
-
-static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
-{
- its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
-}
-
-static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
-{
- its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
-}
-
-static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
-{
- its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
-}
-
-static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
-{
- its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
-}
-
-static void its_encode_size(struct its_cmd_block *cmd, u8 size)
-{
- its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
-}
-
-static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
-{
- its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
-}
-
-static void its_encode_valid(struct its_cmd_block *cmd, int valid)
-{
- its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
-}
-
-static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
-{
- its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
-}
-
-static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
-{
- its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
-}
-
-#define GITS_CMDQ_POLL_ITERATIONS 0
-
-static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
-{
- u64 cwriter = its_read_u64(GITS_CWRITER);
- struct its_cmd_block *dst = cmdq_base + cwriter;
- u64 cbaser = its_read_u64(GITS_CBASER);
- size_t cmdq_size;
- u64 next;
- int i;
-
- cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
-
- its_fixup_cmd(cmd);
-
- WRITE_ONCE(*dst, *cmd);
- dsb(ishst);
- next = (cwriter + sizeof(*cmd)) % cmdq_size;
- its_write_u64(GITS_CWRITER, next);
-
- /*
- * Polling isn't necessary considering KVM's ITS emulation at the time
- * of writing this, as the CMDQ is processed synchronously after a write
- * to CWRITER.
- */
- for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
- __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
- "ITS didn't process command at offset %lu after %d iterations\n",
- cwriter, i);
-
- cpu_relax();
- }
-}
-
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
- size_t itt_size, bool valid)
-{
- struct its_cmd_block cmd = {};
-
- its_encode_cmd(&cmd, GITS_CMD_MAPD);
- its_encode_devid(&cmd, device_id);
- its_encode_size(&cmd, ilog2(itt_size) - 1);
- its_encode_itt(&cmd, itt_base);
- its_encode_valid(&cmd, valid);
-
- its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
-{
- struct its_cmd_block cmd = {};
-
- its_encode_cmd(&cmd, GITS_CMD_MAPC);
- its_encode_collection(&cmd, collection_id);
- its_encode_target(&cmd, vcpu_id);
- its_encode_valid(&cmd, valid);
-
- its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
- u32 collection_id, u32 intid)
-{
- struct its_cmd_block cmd = {};
-
- its_encode_cmd(&cmd, GITS_CMD_MAPTI);
- its_encode_devid(&cmd, device_id);
- its_encode_event_id(&cmd, event_id);
- its_encode_phys_id(&cmd, intid);
- its_encode_collection(&cmd, collection_id);
-
- its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
-{
- struct its_cmd_block cmd = {};
-
- its_encode_cmd(&cmd, GITS_CMD_INVALL);
- its_encode_collection(&cmd, collection_id);
-
- its_send_cmd(cmdq_base, &cmd);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-.macro save_registers
- add sp, sp, #-16 * 17
-
- stp x0, x1, [sp, #16 * 0]
- stp x2, x3, [sp, #16 * 1]
- stp x4, x5, [sp, #16 * 2]
- stp x6, x7, [sp, #16 * 3]
- stp x8, x9, [sp, #16 * 4]
- stp x10, x11, [sp, #16 * 5]
- stp x12, x13, [sp, #16 * 6]
- stp x14, x15, [sp, #16 * 7]
- stp x16, x17, [sp, #16 * 8]
- stp x18, x19, [sp, #16 * 9]
- stp x20, x21, [sp, #16 * 10]
- stp x22, x23, [sp, #16 * 11]
- stp x24, x25, [sp, #16 * 12]
- stp x26, x27, [sp, #16 * 13]
- stp x28, x29, [sp, #16 * 14]
-
- /*
- * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
- * at it. It will _not_ be used to restore the sp on return from the
- * exception so handlers can not update it.
- */
- add x1, sp, #16 * 17
- stp x30, x1, [sp, #16 * 15] /* x30, SP */
-
- mrs x1, elr_el1
- mrs x2, spsr_el1
- stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
-.endm
-
-.macro restore_registers
- ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
- msr elr_el1, x1
- msr spsr_el1, x2
-
- /* sp is not restored */
- ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
-
- ldp x28, x29, [sp, #16 * 14]
- ldp x26, x27, [sp, #16 * 13]
- ldp x24, x25, [sp, #16 * 12]
- ldp x22, x23, [sp, #16 * 11]
- ldp x20, x21, [sp, #16 * 10]
- ldp x18, x19, [sp, #16 * 9]
- ldp x16, x17, [sp, #16 * 8]
- ldp x14, x15, [sp, #16 * 7]
- ldp x12, x13, [sp, #16 * 6]
- ldp x10, x11, [sp, #16 * 5]
- ldp x8, x9, [sp, #16 * 4]
- ldp x6, x7, [sp, #16 * 3]
- ldp x4, x5, [sp, #16 * 2]
- ldp x2, x3, [sp, #16 * 1]
- ldp x0, x1, [sp, #16 * 0]
-
- add sp, sp, #16 * 17
-
- eret
-.endm
-
-.pushsection ".entry.text", "ax"
-.balign 0x800
-.global vectors
-vectors:
-.popsection
-
-.set vector, 0
-
-/*
- * Build an exception handler for vector and append a jump to it into
- * vectors (while making sure that it's 0x80 aligned).
- */
-.macro HANDLER, label
-handler_\label:
- save_registers
- mov x0, sp
- mov x1, #vector
- bl route_exception
- restore_registers
-
-.pushsection ".entry.text", "ax"
-.balign 0x80
- b handler_\label
-.popsection
-
-.set vector, vector + 1
-.endm
-
-.macro HANDLER_INVALID
-.pushsection ".entry.text", "ax"
-.balign 0x80
-/* This will abort so no need to save and restore registers. */
- mov x0, #vector
- mov x1, #0 /* ec */
- mov x2, #0 /* valid_ec */
- b kvm_exit_unexpected_exception
-.popsection
-
-.set vector, vector + 1
-.endm
-
-/*
- * Caution: be sure to not add anything between the declaration of vectors
- * above and these macro calls that will build the vectors table below it.
- */
- HANDLER_INVALID // Synchronous EL1t
- HANDLER_INVALID // IRQ EL1t
- HANDLER_INVALID // FIQ EL1t
- HANDLER_INVALID // Error EL1t
-
- HANDLER el1h_sync // Synchronous EL1h
- HANDLER el1h_irq // IRQ EL1h
- HANDLER el1h_fiq // FIQ EL1h
- HANDLER el1h_error // Error EL1h
-
- HANDLER el0_sync_64 // Synchronous 64-bit EL0
- HANDLER el0_irq_64 // IRQ 64-bit EL0
- HANDLER el0_fiq_64 // FIQ 64-bit EL0
- HANDLER el0_error_64 // Error 64-bit EL0
-
- HANDLER el0_sync_32 // Synchronous 32-bit EL0
- HANDLER el0_irq_32 // IRQ 32-bit EL0
- HANDLER el0_fiq_32 // FIQ 32-bit EL0
- HANDLER el0_error_32 // Error 32-bit EL0
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AArch64 code
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-
-#include <linux/compiler.h>
-#include <assert.h>
-
-#include "guest_modes.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "ucall_common.h"
-
-#include <linux/bitfield.h>
-#include <linux/sizes.h>
-
-#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
-
-static vm_vaddr_t exception_handlers;
-
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
-static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
-
- return (gva >> shift) & mask;
-}
-
-static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
- TEST_ASSERT(vm->pgtable_levels == 4,
- "Mode %d does not have 4 page table levels", vm->mode);
-
- return (gva >> shift) & mask;
-}
-
-static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
- TEST_ASSERT(vm->pgtable_levels >= 3,
- "Mode %d does not have >= 3 page table levels", vm->mode);
-
- return (gva >> shift) & mask;
-}
-
-static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
- return (gva >> vm->page_shift) & mask;
-}
-
-static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
-{
- return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
- (vm->pa_bits > 48 || vm->va_bits > 48);
-}
-
-static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
-{
- uint64_t pte;
-
- if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
- } else {
- pte = pa & GENMASK(47, vm->page_shift);
- if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
- }
- pte |= attrs;
-
- return pte;
-}
-
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
-{
- uint64_t pa;
-
- if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
- } else {
- pa = pte & GENMASK(47, vm->page_shift);
- if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
- }
-
- return pa;
-}
-
-static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
-{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
- return 1 << (vm->va_bits - shift);
-}
-
-static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
-{
- return 1 << (vm->page_shift - 3);
-}
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
- size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
-
- if (vm->pgd_created)
- return;
-
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
-}
-
-static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint64_t flags)
-{
- uint8_t attr_idx = flags & 7;
- uint64_t *ptep;
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx", vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-
- switch (vm->pgtable_levels) {
- case 4:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
- /* fall through */
- case 3:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
- /* fall through */
- case 2:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
- break;
- default:
- TEST_FAIL("Page table levels must be 2, 3, or 4");
- }
-
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
- uint64_t attr_idx = MT_NORMAL;
-
- _virt_pg_map(vm, vaddr, paddr, attr_idx);
-}
-
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- uint64_t *ptep;
-
- if (!vm->pgd_created)
- goto unmapped_gva;
-
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
-
- switch (vm->pgtable_levels) {
- case 4:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
- /* fall through */
- case 3:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
- /* fall through */
- case 2:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
- break;
- default:
- TEST_FAIL("Page table levels must be 2, 3, or 4");
- }
-
- return ptep;
-
-unmapped_gva:
- TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(EXIT_FAILURE);
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- uint64_t *ptep = virt_get_pte_hva(vm, gva);
-
- return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
-}
-
-static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
-{
-#ifdef DEBUG
- static const char * const type[] = { "", "pud", "pmd", "pte" };
- uint64_t pte, *ptep;
-
- if (level == 4)
- return;
-
- for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
- ptep = addr_gpa2hva(vm, pte);
- if (!*ptep)
- continue;
- fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
- pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
- }
-#endif
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- int level = 4 - (vm->pgtable_levels - 1);
- uint64_t pgd, *ptep;
-
- if (!vm->pgd_created)
- return;
-
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
- ptep = addr_gpa2hva(vm, pgd);
- if (!*ptep)
- continue;
- fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
- pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
- }
-}
-
-void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
-{
- struct kvm_vcpu_init default_init = { .target = -1, };
- struct kvm_vm *vm = vcpu->vm;
- uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
-
- if (!init)
- init = &default_init;
-
- if (init->target == -1) {
- struct kvm_vcpu_init preferred;
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
- init->target = preferred.target;
- }
-
- vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
-
- /*
- * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
- * registers, which the variable argument list macros do.
- */
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
-
- sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
- tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
-
- /* Configure base granule size */
- switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
- TEST_FAIL("AArch64 does not support 4K sized pages "
- "with ANY-bit physical address ranges");
- case VM_MODE_P52V48_64K:
- case VM_MODE_P48V48_64K:
- case VM_MODE_P40V48_64K:
- case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- break;
- case VM_MODE_P52V48_16K:
- case VM_MODE_P48V48_16K:
- case VM_MODE_P40V48_16K:
- case VM_MODE_P36V48_16K:
- case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
- break;
- case VM_MODE_P52V48_4K:
- case VM_MODE_P48V48_4K:
- case VM_MODE_P40V48_4K:
- case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- break;
- default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
- }
-
- ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
-
- /* Configure output size */
- switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- case VM_MODE_P52V48_16K:
- case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
- ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
- break;
- case VM_MODE_P48V48_4K:
- case VM_MODE_P48V48_16K:
- case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
- break;
- case VM_MODE_P40V48_4K:
- case VM_MODE_P40V48_16K:
- case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
- case VM_MODE_P36V48_4K:
- case VM_MODE_P36V48_16K:
- case VM_MODE_P36V48_64K:
- case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
- break;
- default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
- }
-
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
- if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
-
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
- uint64_t pstate, pc;
-
- pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
- pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-
- fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
- indent, "", pstate, pc);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
- vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-}
-
-static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_vcpu_init *init)
-{
- size_t stack_size;
- uint64_t stack_vaddr;
- struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
-
- stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
- vm->page_size;
- stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
-
- aarch64_vcpu_setup(vcpu, init);
-
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
- return vcpu;
-}
-
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_vcpu_init *init, void *guest_code)
-{
- struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
-
- vcpu_arch_set_entry_point(vcpu, guest_code);
-
- return vcpu;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
- return __aarch64_vcpu_add(vm, vcpu_id, NULL);
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
- va_list ap;
- int i;
-
- TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
- " num: %u", num);
-
- va_start(ap, num);
-
- for (i = 0; i < num; i++) {
- vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
- va_arg(ap, uint64_t));
- }
-
- va_end(ap);
-}
-
-void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
-{
- ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
- while (1)
- ;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
- return;
-
- if (uc.args[2]) /* valid_ec */ {
- assert(VECTOR_IS_SYNC(uc.args[0]));
- TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
- uc.args[0], uc.args[1]);
- } else {
- assert(!VECTOR_IS_SYNC(uc.args[0]));
- TEST_FAIL("Unexpected exception (vector:0x%lx)",
- uc.args[0]);
- }
-}
-
-struct handlers {
- handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
-};
-
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
-{
- extern char vectors;
-
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
-}
-
-void route_exception(struct ex_regs *regs, int vector)
-{
- struct handlers *handlers = (struct handlers *)exception_handlers;
- bool valid_ec;
- int ec = 0;
-
- switch (vector) {
- case VECTOR_SYNC_CURRENT:
- case VECTOR_SYNC_LOWER_64:
- ec = ESR_ELx_EC(read_sysreg(esr_el1));
- valid_ec = true;
- break;
- case VECTOR_IRQ_CURRENT:
- case VECTOR_IRQ_LOWER_64:
- case VECTOR_FIQ_CURRENT:
- case VECTOR_FIQ_LOWER_64:
- case VECTOR_ERROR_CURRENT:
- case VECTOR_ERROR_LOWER_64:
- ec = 0;
- valid_ec = false;
- break;
- default:
- valid_ec = false;
- goto unexpected_exception;
- }
-
- if (handlers && handlers->exception_handlers[vector][ec])
- return handlers->exception_handlers[vector][ec](regs);
-
-unexpected_exception:
- kvm_exit_unexpected_exception(vector, ec, valid_ec);
-}
-
-void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
- vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
- vm->page_size, MEM_REGION_DATA);
-
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-}
-
-void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
- void (*handler)(struct ex_regs *))
-{
- struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
-
- assert(VECTOR_IS_SYNC(vector));
- assert(vector < VECTOR_NUM);
- assert(ec <= ESR_ELx_EC_MAX);
- handlers->exception_handlers[vector][ec] = handler;
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
-{
- struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
-
- assert(!VECTOR_IS_SYNC(vector));
- assert(vector < VECTOR_NUM);
- handlers->exception_handlers[vector][0] = handler;
-}
-
-uint32_t guest_get_vcpuid(void)
-{
- return read_sysreg(tpidr_el1);
-}
-
-static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
- uint32_t not_sup_val, uint32_t ipa52_min_val)
-{
- if (gran == not_sup_val)
- return 0;
- else if (gran >= ipa52_min_val && vm_ipa >= 52)
- return 52;
- else
- return min(vm_ipa, 48U);
-}
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
- uint32_t *ipa16k, uint32_t *ipa64k)
-{
- struct kvm_vcpu_init preferred_init;
- int kvm_fd, vm_fd, vcpu_fd, err;
- uint64_t val;
- uint32_t gran;
- struct kvm_one_reg reg = {
- .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
- .addr = (uint64_t)&val,
- };
-
- kvm_fd = open_kvm_dev_path_or_exit();
- vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
- TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
-
- vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
- TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
-
- err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
- TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
- err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
- TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
-
- err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®);
- TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
-
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
- *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
- ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
-
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
- *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
- ID_AA64MMFR0_EL1_TGRAN64_IMP);
-
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
- *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
- ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
-
- close(vcpu_fd);
- close(vm_fd);
- close(kvm_fd);
-}
-
-#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
- arg6, res) \
- asm volatile("mov w0, %w[function_id]\n" \
- "mov x1, %[arg0]\n" \
- "mov x2, %[arg1]\n" \
- "mov x3, %[arg2]\n" \
- "mov x4, %[arg3]\n" \
- "mov x5, %[arg4]\n" \
- "mov x6, %[arg5]\n" \
- "mov x7, %[arg6]\n" \
- #insn "#0\n" \
- "mov %[res0], x0\n" \
- "mov %[res1], x1\n" \
- "mov %[res2], x2\n" \
- "mov %[res3], x3\n" \
- : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
- [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
- : [function_id] "r"(function_id), [arg0] "r"(arg0), \
- [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
- [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
- : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
-
-
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res)
-{
- __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
- arg6, res);
-}
-
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res)
-{
- __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
- arg6, res);
-}
-
-void kvm_selftest_arch_init(void)
-{
- /*
- * arm64 doesn't have a true default mode, so start by computing the
- * available IPA space and page sizes early.
- */
- guest_modes_append_default();
-}
-
-void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
-{
- /*
- * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
- * is [0, 2^(64 - TCR_EL1.T0SZ)).
- */
- sparsebit_set_num(vm->vpages_valid, 0,
- (1ULL << vm->va_bits) >> vm->page_shift);
-}
-
-/* Helper to call wfi instruction. */
-void wfi(void)
-{
- asm volatile("wfi");
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 Spinlock support
- */
-#include <stdint.h>
-
-#include "spinlock.h"
-
-void spin_lock(struct spinlock *lock)
-{
- int val, res;
-
- asm volatile(
- "1: ldaxr %w0, [%2]\n"
- " cbnz %w0, 1b\n"
- " mov %w0, #1\n"
- " stxr %w1, %w0, [%2]\n"
- " cbnz %w1, 1b\n"
- : "=&r" (val), "=&r" (res)
- : "r" (&lock->v)
- : "memory");
-}
-
-void spin_unlock(struct spinlock *lock)
-{
- asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-vm_vaddr_t *ucall_exit_mmio_addr;
-
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
- vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
-
- virt_map(vm, mmio_gva, mmio_gpa, 1);
-
- vm->ucall_mmio_addr = mmio_gpa;
-
- write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
-}
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
-
- if (run->exit_reason == KVM_EXIT_MMIO &&
- run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
- TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
- "Unexpected ucall exit mmio address access");
- return (void *)(*((uint64_t *)run->mmio.data));
- }
-
- return NULL;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) v3 host support
- */
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <asm/cputype.h>
-#include <asm/kvm_para.h>
-#include <asm/kvm.h>
-
-#include "kvm_util.h"
-#include "vgic.h"
-#include "gic.h"
-#include "gic_v3.h"
-
-/*
- * vGIC-v3 default host setup
- *
- * Input args:
- * vm - KVM VM
- * nr_vcpus - Number of vCPUs supported by this VM
- *
- * Output args: None
- *
- * Return: GIC file-descriptor or negative error code upon failure
- *
- * The function creates a vGIC-v3 device and maps the distributor and
- * redistributor regions of the guest. Since it depends on the number of
- * vCPUs for the VM, it must be called after all the vCPUs have been created.
- */
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
-{
- int gic_fd;
- uint64_t attr;
- struct list_head *iter;
- unsigned int nr_gic_pages, nr_vcpus_created = 0;
-
- TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
-
- /*
- * Make sure that the caller is infact calling this
- * function after all the vCPUs are added.
- */
- list_for_each(iter, &vm->vcpus)
- nr_vcpus_created++;
- TEST_ASSERT(nr_vcpus == nr_vcpus_created,
- "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
- nr_vcpus, nr_vcpus_created);
-
- /* Distributor setup */
- gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
- if (gic_fd < 0)
- return gic_fd;
-
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
-
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- attr = GICD_BASE_GPA;
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
- nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
- virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
-
- /* Redistributor setup */
- attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
- nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
- KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
- virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
-
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- return gic_fd;
-}
-
-/* should only work for level sensitive interrupts */
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
-{
- uint64_t attr = 32 * (intid / 32);
- uint64_t index = intid % 32;
- uint64_t val;
- int ret;
-
- ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
- attr, &val);
- if (ret != 0)
- return ret;
-
- val |= 1U << index;
- ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
- attr, &val);
- return ret;
-}
-
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
-{
- int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
-
- TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
-}
-
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
-{
- uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
-
- TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
- "doesn't allow injecting SGIs. There's no mask for it.");
-
- if (INTID_IS_PPI(intid))
- irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
- else
- irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
-
- return _kvm_irq_line(vm, irq, level);
-}
-
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
-{
- int ret = _kvm_arm_irq_line(vm, intid, level);
-
- TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
-}
-
-static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
- uint64_t reg_off)
-{
- uint64_t reg = intid / 32;
- uint64_t index = intid % 32;
- uint64_t attr = reg_off + reg * 4;
- uint64_t val;
- bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
-
- uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
- : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
-
- if (intid_is_private) {
- /* TODO: only vcpu 0 implemented for now. */
- assert(vcpu->id == 0);
- attr += SZ_64K;
- }
-
- /* Check that the addr part of the attr is within 32 bits. */
- assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
-
- /*
- * All calls will succeed, even with invalid intid's, as long as the
- * addr part of the attr is within 32 bits (checked above). An invalid
- * intid will just make the read/writes point to above the intended
- * register space (i.e., ICPENDR after ISPENDR).
- */
- kvm_device_attr_get(gic_fd, group, attr, &val);
- val |= 1ULL << index;
- kvm_device_attr_set(gic_fd, group, attr, &val);
-}
-
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
-{
- vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
-}
-
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
-{
- vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
-}
-
-int vgic_its_setup(struct kvm_vm *vm)
-{
- int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
- u64 attr;
-
- attr = GITS_BASE_GPA;
- kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_ITS_ADDR_TYPE, &attr);
-
- kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
- virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
- vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
-
- return its_fd;
-}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu)
+{
+ gic_common_ops->gic_cpu_init(cpu);
+}
+
+static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
+{
+ const struct gic_common_ops *gic_ops = NULL;
+
+ spin_lock(&gic_lock);
+
+ /* Distributor initialization is needed only once per VM */
+ if (gic_common_ops) {
+ spin_unlock(&gic_lock);
+ return;
+ }
+
+ if (type == GIC_V3)
+ gic_ops = &gicv3_ops;
+
+ GUEST_ASSERT(gic_ops);
+
+ gic_ops->gic_init(nr_cpus);
+ gic_common_ops = gic_ops;
+
+ /* Make sure that the initialized data is visible to all the vCPUs */
+ dsb(sy);
+
+ spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ GUEST_ASSERT(type < GIC_TYPE_MAX);
+ GUEST_ASSERT(nr_cpus);
+
+ gic_dist_init(type, nr_cpus);
+ gic_cpu_init(cpu);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+ uint64_t irqstat;
+ unsigned int intid;
+
+ GUEST_ASSERT(gic_common_ops);
+
+ irqstat = gic_common_ops->gic_read_iar();
+ intid = irqstat & GENMASK(23, 0);
+
+ return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_eoir(intid);
+}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+ void (*gic_init)(unsigned int nr_cpus);
+ void (*gic_cpu_init)(unsigned int cpu);
+ void (*gic_irq_enable)(unsigned int intid);
+ void (*gic_irq_disable)(unsigned int intid);
+ uint64_t (*gic_read_iar)(void);
+ void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_private.h"
+
+#define GICV3_MAX_CPUS 512
+
+#define GICD_INT_DEF_PRI 0xa0
+#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
+ (GICD_INT_DEF_PRI << 16) |\
+ (GICD_INT_DEF_PRI << 8) |\
+ GICD_INT_DEF_PRI)
+
+#define ICC_PMR_DEF_PRIO 0xf0
+
+struct gicv3_data {
+ unsigned int nr_cpus;
+ unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
+
+enum gicv3_intid_range {
+ SGI_RANGE,
+ PPI_RANGE,
+ SPI_RANGE,
+ INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static inline volatile void *gicr_base_cpu(uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return GICR_BASE_GVA + cpu * SZ_64K * 2;
+}
+
+static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(cpu_or_dist);
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+ switch (intid) {
+ case 0 ... 15:
+ return SGI_RANGE;
+ case 16 ... 31:
+ return PPI_RANGE;
+ case 32 ... 1019:
+ return SPI_RANGE;
+ }
+
+ /* We should not be reaching here */
+ GUEST_ASSERT(0);
+
+ return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+ uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+ dsb(sy);
+ return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+ isb();
+}
+
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /*
+ * All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ enum gicv3_intid_range intid_range = get_intid_range(intid);
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+ /*
+ * This function does not support 64 bit accesses. Just asserting here
+ * until we implement readq/writeq.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_pending(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
+}
+
+static void gicv3_enable_redist(volatile void *redist_base)
+{
+ uint32_t val = readl(redist_base + GICR_WAKER);
+ unsigned int count = 100000; /* 1s */
+
+ val &= ~GICR_WAKER_ProcessorSleep;
+ writel(val, redist_base + GICR_WAKER);
+
+ /* Wait until the processor is 'active' */
+ while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_cpu_init(unsigned int cpu)
+{
+ volatile void *sgi_base;
+ unsigned int i;
+ volatile void *redist_base_cpu;
+
+ GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+ redist_base_cpu = gicr_base_cpu(cpu);
+ sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+ gicv3_enable_redist(redist_base_cpu);
+
+ /*
+ * Mark all the SGI and PPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ writel(~0, sgi_base + GICR_IGROUPR0);
+ writel(~0, sgi_base + GICR_ICACTIVER0);
+ writel(~0, sgi_base + GICR_ICENABLER0);
+
+ /* Set a default priority for all the SGIs and PPIs */
+ for (i = 0; i < 32; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ sgi_base + GICR_IPRIORITYR0 + i);
+
+ gicv3_gicr_wait_for_rwp(cpu);
+
+ /* Enable the GIC system register (ICC_*) access */
+ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+ SYS_ICC_SRE_EL1);
+
+ /* Set a default priority threshold */
+ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+ /* Enable non-secure Group-1 interrupts */
+ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
+}
+
+static void gicv3_dist_init(void)
+{
+ unsigned int i;
+
+ /* Disable the distributor until we set things up */
+ writel(0, GICD_BASE_GVA + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+
+ /*
+ * Mark all the SPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+ writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
+ }
+
+ /* Set a default priority for all the SPIs */
+ for (i = 32; i < gicv3_data.nr_spis; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ GICD_BASE_GVA + GICD_IPRIORITYR + i);
+
+ /* Wait for the settings to sync-in */
+ gicv3_gicd_wait_for_rwp();
+
+ /* Finally, enable the distributor globally with ARE */
+ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+ GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus)
+{
+ GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+ gicv3_data.nr_cpus = nr_cpus;
+ gicv3_data.nr_spis = GICD_TYPER_SPIS(
+ readl(GICD_BASE_GVA + GICD_TYPER));
+ if (gicv3_data.nr_spis > 1020)
+ gicv3_data.nr_spis = 1020;
+
+ /*
+ * Initialize only the distributor for now.
+ * The redistributor and CPU interfaces are initialized
+ * later for every PE.
+ */
+ gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+ .gic_init = gicv3_init,
+ .gic_cpu_init = gicv3_cpu_init,
+ .gic_irq_enable = gicv3_irq_enable,
+ .gic_irq_disable = gicv3_irq_disable,
+ .gic_read_iar = gicv3_read_iar,
+ .gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
+};
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table)
+{
+ volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
+
+ u32 ctlr;
+ u64 val;
+
+ val = (cfg_table |
+ GICR_PROPBASER_InnerShareable |
+ GICR_PROPBASER_RaWaWb |
+ ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
+ writeq_relaxed(val, rdist_base + GICR_PROPBASER);
+
+ val = (pend_table |
+ GICR_PENDBASER_InnerShareable |
+ GICR_PENDBASER_RaWaWb);
+ writeq_relaxed(val, rdist_base + GICR_PENDBASER);
+
+ ctlr = readl_relaxed(rdist_base + GICR_CTLR);
+ ctlr |= GICR_CTLR_ENABLE_LPIS;
+ writel_relaxed(ctlr, rdist_base + GICR_CTLR);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
+ * over in the kernel tree.
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "processor.h"
+
+static u64 its_read_u64(unsigned long offset)
+{
+ return readq_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u64(unsigned long offset, u64 val)
+{
+ writeq_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static u32 its_read_u32(unsigned long offset)
+{
+ return readl_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u32(unsigned long offset, u32 val)
+{
+ writel_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static unsigned long its_find_baser(unsigned int type)
+{
+ int i;
+
+ for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+ u64 baser;
+ unsigned long offset = GITS_BASER + (i * sizeof(baser));
+
+ baser = its_read_u64(offset);
+ if (GITS_BASER_TYPE(baser) == type)
+ return offset;
+ }
+
+ GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
+ return -1;
+}
+
+static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+{
+ unsigned long offset = its_find_baser(type);
+ u64 baser;
+
+ baser = ((size / SZ_64K) - 1) |
+ GITS_BASER_PAGE_SIZE_64K |
+ GITS_BASER_InnerShareable |
+ base |
+ GITS_BASER_RaWaWb |
+ GITS_BASER_VALID;
+
+ its_write_u64(offset, baser);
+}
+
+static void its_install_cmdq(vm_paddr_t base, size_t size)
+{
+ u64 cbaser;
+
+ cbaser = ((size / SZ_4K) - 1) |
+ GITS_CBASER_InnerShareable |
+ base |
+ GITS_CBASER_RaWaWb |
+ GITS_CBASER_VALID;
+
+ its_write_u64(GITS_CBASER, cbaser);
+}
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size)
+{
+ u32 ctlr;
+
+ its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
+ its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
+ its_install_cmdq(cmdq, cmdq_size);
+
+ ctlr = its_read_u32(GITS_CTLR);
+ ctlr |= GITS_CTLR_ENABLE;
+ its_write_u32(GITS_CTLR, ctlr);
+}
+
+struct its_cmd_block {
+ union {
+ u64 raw_cmd[4];
+ __le64 raw_cmd_le[4];
+ };
+};
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+ /* Let's fixup BE commands */
+ cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
+ cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
+ cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
+ cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
+{
+ u64 mask = GENMASK_ULL(h, l);
+ *raw_cmd &= ~mask;
+ *raw_cmd |= (val << l) & mask;
+}
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+ its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+ its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+ its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+ its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+ its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
+}
+
+#define GITS_CMDQ_POLL_ITERATIONS 0
+
+static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
+{
+ u64 cwriter = its_read_u64(GITS_CWRITER);
+ struct its_cmd_block *dst = cmdq_base + cwriter;
+ u64 cbaser = its_read_u64(GITS_CBASER);
+ size_t cmdq_size;
+ u64 next;
+ int i;
+
+ cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
+
+ its_fixup_cmd(cmd);
+
+ WRITE_ONCE(*dst, *cmd);
+ dsb(ishst);
+ next = (cwriter + sizeof(*cmd)) % cmdq_size;
+ its_write_u64(GITS_CWRITER, next);
+
+ /*
+ * Polling isn't necessary considering KVM's ITS emulation at the time
+ * of writing this, as the CMDQ is processed synchronously after a write
+ * to CWRITER.
+ */
+ for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
+ __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
+ "ITS didn't process command at offset %lu after %d iterations\n",
+ cwriter, i);
+
+ cpu_relax();
+ }
+}
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPD);
+ its_encode_devid(&cmd, device_id);
+ its_encode_size(&cmd, ilog2(itt_size) - 1);
+ its_encode_itt(&cmd, itt_base);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPC);
+ its_encode_collection(&cmd, collection_id);
+ its_encode_target(&cmd, vcpu_id);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPTI);
+ its_encode_devid(&cmd, device_id);
+ its_encode_event_id(&cmd, event_id);
+ its_encode_phys_id(&cmd, intid);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_INVALL);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "guest_modes.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
+
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+ TEST_ASSERT(vm->pgtable_levels == 4,
+ "Mode %d does not have 4 page table levels", vm->mode);
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+ TEST_ASSERT(vm->pgtable_levels >= 3,
+ "Mode %d does not have >= 3 page table levels", vm->mode);
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> vm->page_shift) & mask;
+}
+
+static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
+{
+ return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
+ (vm->pa_bits > 48 || vm->va_bits > 48);
+}
+
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+{
+ uint64_t pte;
+
+ if (use_lpa2_pte_format(vm)) {
+ pte = pa & GENMASK(49, vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
+ attrs &= ~GENMASK(9, 8);
+ } else {
+ pte = pa & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ }
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+ uint64_t pa;
+
+ if (use_lpa2_pte_format(vm)) {
+ pa = pte & GENMASK(49, vm->page_shift);
+ pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ } else {
+ pa = pte & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ }
+
+ return pa;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+ unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
+}
+
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
+{
+ uint8_t attr_idx = flags & 7;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ /* fall through */
+ case 3:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ /* fall through */
+ case 2:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+ break;
+ default:
+ TEST_FAIL("Page table levels must be 2, 3, or 4");
+ }
+
+ *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t attr_idx = MT_NORMAL;
+
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
+}
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ /* fall through */
+ case 3:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ /* fall through */
+ case 2:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ break;
+ default:
+ TEST_FAIL("Page table levels must be 2, 3, or 4");
+ }
+
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char * const type[] = { "", "pud", "pmd", "pte" };
+ uint64_t pte, *ptep;
+
+ if (level == 4)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+ }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = 4 - (vm->pgtable_levels - 1);
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+ }
+}
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
+{
+ struct kvm_vcpu_init default_init = { .target = -1, };
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
+
+ if (!init)
+ init = &default_init;
+
+ if (init->target == -1) {
+ struct kvm_vcpu_init preferred;
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+ init->target = preferred.target;
+ }
+
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+
+ /*
+ * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+ * registers, which the variable argument list macros do.
+ */
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+
+ sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
+ tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
+
+ /* Configure base granule size */
+ switch (vm->mode) {
+ case VM_MODE_PXXV48_4K:
+ TEST_FAIL("AArch64 does not support 4K sized pages "
+ "with ANY-bit physical address ranges");
+ case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ break;
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
+ tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P48V48_64K:
+ tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ break;
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P40V48_64K:
+ tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+ /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+ tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+ tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ if (use_lpa2_pte_format(vm))
+ tcr_el1 |= (1ul << 59) /* DS */;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ uint64_t pstate, pc;
+
+ pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
+ pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+
+ fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
+ indent, "", pstate, pc);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ aarch64_vcpu_setup(vcpu, init);
+
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
+
+ vcpu_arch_set_entry_point(vcpu, guest_code);
+
+ return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ return __aarch64_vcpu_add(vm, vcpu_id, NULL);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+ va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+ extern char vectors;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = ESR_ELx_EC(read_sysreg(esr_el1));
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec <= ESR_ELx_EC_MAX);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return read_sysreg(tpidr_el1);
+}
+
+static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
+ uint32_t not_sup_val, uint32_t ipa52_min_val)
+{
+ if (gran == not_sup_val)
+ return 0;
+ else if (gran >= ipa52_min_val && vm_ipa >= 52)
+ return 52;
+ else
+ return min(vm_ipa, 48U);
+}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ uint32_t gran;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
+ ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
+ ID_AA64MMFR0_EL1_TGRAN64_IMP);
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
+ ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+ guest_modes_append_default();
+}
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+ /*
+ * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+ * is [0, 2^(64 - TCR_EL1.T0SZ)).
+ */
+ sparsebit_set_num(vm->vpages_valid, 0,
+ (1ULL << vm->va_bits) >> vm->page_shift);
+}
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+ asm volatile("wfi");
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+ int val, res;
+
+ asm volatile(
+ "1: ldaxr %w0, [%2]\n"
+ " cbnz %w0, 1b\n"
+ " mov %w0, #1\n"
+ " stxr %w1, %w0, [%2]\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (val), "=&r" (res)
+ : "r" (&lock->v)
+ : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+ asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/cputype.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ * vm - KVM VM
+ * nr_vcpus - Number of vCPUs supported by this VM
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+ int gic_fd;
+ uint64_t attr;
+ struct list_head *iter;
+ unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ /* Distributor setup */
+ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (gic_fd < 0)
+ return gic_fd;
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ attr = GICD_BASE_GPA;
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+ virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
+
+ /* Redistributor setup */
+ attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+ KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+ virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ return gic_fd;
+}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+ uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu->id == 0);
+ attr += SZ_64K;
+ }
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+ /*
+ * All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_attr_get(gic_fd, group, attr, &val);
+ val |= 1ULL << index;
+ kvm_device_attr_set(gic_fd, group, attr, &val);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
+
+int vgic_its_setup(struct kvm_vm *vm)
+{
+ int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+ u64 attr;
+
+ attr = GITS_BASE_GPA;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &attr);
+
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
+ vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
+
+ return its_fd;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+
+static void guest_code(void)
+{
+ uint64_t diag318_info = 0x12345678;
+
+ asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ uint64_t reg;
+ uint64_t diag318_info;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_run(vcpu);
+ run = vcpu->run;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+ "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+ TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+ "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+ reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+ diag318_info = run->s.regs.gprs[reg];
+
+ TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+ kvm_vm_free(vm);
+
+ return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+ static uint64_t diag318_info;
+ static bool printed_skip;
+
+ /*
+ * If KVM does not support diag318, then return 0 to
+ * ensure tests do not break.
+ */
+ if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
+ if (!printed_skip) {
+ fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+ "Skipping diag318 test.\n");
+ printed_skip = true;
+ }
+ return 0;
+ }
+
+ /*
+ * If a test has previously requested the diag318 info,
+ * then don't bother spinning up a temporary VM again.
+ */
+ if (!diag318_info)
+ diag318_info = diag318_handler();
+
+ return diag318_info;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Contains the definition for the global variables to have the test facitlity feature.
+ */
+
+#include "facility.h"
+
+uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+bool stfle_flag;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM selftest s390x library code - CPU-related functions (page tables...)
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#include "processor.h"
+#include "kvm_util.h"
+
+#define PAGES_PER_REGION 4
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ vm_paddr_t paddr;
+
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+ vm->page_size);
+
+ if (vm->pgd_created)
+ return;
+
+ paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+}
+
+/*
+ * Allocate 4 pages for a region/segment table (ri < 4), or one page for
+ * a page table (ri == 4). Returns a suitable region/segment table entry
+ * which points to the freshly allocated pages.
+ */
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
+{
+ uint64_t taddr;
+
+ taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+ memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+ return (taddr & REGION_ENTRY_ORIGIN)
+ | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
+ | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
+{
+ int ri, idx;
+ uint64_t *entry;
+
+ TEST_ASSERT((gva % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ gva, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (gva >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ gva);
+ TEST_ASSERT((gpa % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ gva, vm->page_size);
+ TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ gva, vm->max_gfn, vm->page_size);
+
+ /* Walk through region and segment tables */
+ entry = addr_gpa2hva(vm, vm->pgd);
+ for (ri = 1; ri <= 4; ri++) {
+ idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+ if (entry[idx] & REGION_ENTRY_INVALID)
+ entry[idx] = virt_alloc_region(vm, ri);
+ entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+ }
+
+ /* Fill in page table entry */
+ idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
+ if (!(entry[idx] & PAGE_INVALID))
+ fprintf(stderr,
+ "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
+ entry[idx] = gpa;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ int ri, idx;
+ uint64_t *entry;
+
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+ vm->page_size);
+
+ entry = addr_gpa2hva(vm, vm->pgd);
+ for (ri = 1; ri <= 4; ri++) {
+ idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+ TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
+ "No region mapping for vm virtual address 0x%lx",
+ gva);
+ entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+ }
+
+ idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
+
+ TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
+ "No page mapping for vm virtual address 0x%lx", gva);
+
+ return (entry[idx] & ~0xffful) + (gva & 0xffful);
+}
+
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t ptea_start)
+{
+ uint64_t *pte, ptea;
+
+ for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
+ pte = addr_gpa2hva(vm, ptea);
+ if (*pte & PAGE_INVALID)
+ continue;
+ fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
+ indent, "", ptea, *pte);
+ }
+}
+
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t reg_tab_addr)
+{
+ uint64_t addr, *entry;
+
+ for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
+ entry = addr_gpa2hva(vm, addr);
+ if (*entry & REGION_ENTRY_INVALID)
+ continue;
+ fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
+ indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
+ addr, *entry);
+ if (*entry & REGION_ENTRY_TYPE) {
+ virt_dump_region(stream, vm, indent + 2,
+ *entry & REGION_ENTRY_ORIGIN);
+ } else {
+ virt_dump_ptes(stream, vm, indent + 2,
+ *entry & REGION_ENTRY_ORIGIN);
+ }
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ if (!vm->pgd_created)
+ return;
+
+ virt_dump_region(stream, vm, indent, vm->pgd);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu->run->psw_addr = (uintptr_t)guest_code;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+ vm->page_size);
+
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ /* Setup guest registers */
+ vcpu_regs_get(vcpu, ®s);
+ regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+ vcpu_regs_set(vcpu, ®s);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
+ sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
+
+ return vcpu;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
+ " num: %u",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vcpu, ®s);
+
+ for (i = 0; i < num; i++)
+ regs.gprs[i + 2] = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vcpu, ®s);
+ va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
+ indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
+ run->s390_sieic.icptcode == 4 &&
+ (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
+ (run->s390_sieic.ipb >> 16) == 0x501) {
+ int reg = run->s390_sieic.ipa & 0xf;
+
+ return (void *)run->s.regs.gprs[reg];
+ }
+ return NULL;
+}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test handler for the s390x DIAGNOSE 0x0318 instruction.
- *
- * Copyright (C) 2020, IBM
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-
-#define ICPT_INSTRUCTION 0x04
-#define IPA0_DIAG 0x8300
-
-static void guest_code(void)
-{
- uint64_t diag318_info = 0x12345678;
-
- asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
-}
-
-/*
- * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
- * we create an ad-hoc VM here to handle the instruction then extract the
- * necessary data. It is up to the caller to decide what to do with that data.
- */
-static uint64_t diag318_handler(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct kvm_run *run;
- uint64_t reg;
- uint64_t diag318_info;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_run(vcpu);
- run = vcpu->run;
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
- TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
- "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
- TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
- "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
-
- reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
- diag318_info = run->s.regs.gprs[reg];
-
- TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
-
- kvm_vm_free(vm);
-
- return diag318_info;
-}
-
-uint64_t get_diag318_info(void)
-{
- static uint64_t diag318_info;
- static bool printed_skip;
-
- /*
- * If KVM does not support diag318, then return 0 to
- * ensure tests do not break.
- */
- if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
- if (!printed_skip) {
- fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
- "Skipping diag318 test.\n");
- printed_skip = true;
- }
- return 0;
- }
-
- /*
- * If a test has previously requested the diag318 info,
- * then don't bother spinning up a temporary VM again.
- */
- if (!diag318_info)
- diag318_info = diag318_handler();
-
- return diag318_info;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- * Hariharan Mari <hari55@linux.ibm.com>
- *
- * Contains the definition for the global variables to have the test facitlity feature.
- */
-
-#include "facility.h"
-
-uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
-bool stfle_flag;
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM selftest s390x library code - CPU-related functions (page tables...)
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-
-#include "processor.h"
-#include "kvm_util.h"
-
-#define PAGES_PER_REGION 4
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
- vm_paddr_t paddr;
-
- TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
- vm->page_size);
-
- if (vm->pgd_created)
- return;
-
- paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
-
- vm->pgd = paddr;
- vm->pgd_created = true;
-}
-
-/*
- * Allocate 4 pages for a region/segment table (ri < 4), or one page for
- * a page table (ri == 4). Returns a suitable region/segment table entry
- * which points to the freshly allocated pages.
- */
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
-{
- uint64_t taddr;
-
- taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
- memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
-
- return (taddr & REGION_ENTRY_ORIGIN)
- | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
- | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
-{
- int ri, idx;
- uint64_t *entry;
-
- TEST_ASSERT((gva % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- gva, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (gva >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx",
- gva);
- TEST_ASSERT((gpa % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- gva, vm->page_size);
- TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- gva, vm->max_gfn, vm->page_size);
-
- /* Walk through region and segment tables */
- entry = addr_gpa2hva(vm, vm->pgd);
- for (ri = 1; ri <= 4; ri++) {
- idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
- if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri);
- entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
- }
-
- /* Fill in page table entry */
- idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
- if (!(entry[idx] & PAGE_INVALID))
- fprintf(stderr,
- "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
- entry[idx] = gpa;
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- int ri, idx;
- uint64_t *entry;
-
- TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
- vm->page_size);
-
- entry = addr_gpa2hva(vm, vm->pgd);
- for (ri = 1; ri <= 4; ri++) {
- idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
- TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
- "No region mapping for vm virtual address 0x%lx",
- gva);
- entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
- }
-
- idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
-
- TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
- "No page mapping for vm virtual address 0x%lx", gva);
-
- return (entry[idx] & ~0xffful) + (gva & 0xffful);
-}
-
-static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
- uint64_t ptea_start)
-{
- uint64_t *pte, ptea;
-
- for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
- pte = addr_gpa2hva(vm, ptea);
- if (*pte & PAGE_INVALID)
- continue;
- fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
- indent, "", ptea, *pte);
- }
-}
-
-static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
- uint64_t reg_tab_addr)
-{
- uint64_t addr, *entry;
-
- for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
- entry = addr_gpa2hva(vm, addr);
- if (*entry & REGION_ENTRY_INVALID)
- continue;
- fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
- indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
- addr, *entry);
- if (*entry & REGION_ENTRY_TYPE) {
- virt_dump_region(stream, vm, indent + 2,
- *entry & REGION_ENTRY_ORIGIN);
- } else {
- virt_dump_ptes(stream, vm, indent + 2,
- *entry & REGION_ENTRY_ORIGIN);
- }
- }
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- if (!vm->pgd_created)
- return;
-
- virt_dump_region(stream, vm, indent, vm->pgd);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
- vcpu->run->psw_addr = (uintptr_t)guest_code;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
- size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
- uint64_t stack_vaddr;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- struct kvm_vcpu *vcpu;
-
- TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
- vm->page_size);
-
- stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
-
- vcpu = __vm_vcpu_add(vm, vcpu_id);
-
- /* Setup guest registers */
- vcpu_regs_get(vcpu, ®s);
- regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
- vcpu_regs_set(vcpu, ®s);
-
- vcpu_sregs_get(vcpu, &sregs);
- sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
- sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
- vcpu_sregs_set(vcpu, &sregs);
-
- vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
-
- return vcpu;
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
- va_list ap;
- struct kvm_regs regs;
- int i;
-
- TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
- " num: %u",
- num);
-
- va_start(ap, num);
- vcpu_regs_get(vcpu, ®s);
-
- for (i = 0; i < num; i++)
- regs.gprs[i + 2] = va_arg(ap, uint64_t);
-
- vcpu_regs_set(vcpu, ®s);
- va_end(ap);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
- fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
- indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2019 Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
-
- if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
- run->s390_sieic.icptcode == 4 &&
- (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
- (run->s390_sieic.ipb >> 16) == 0x501) {
- int reg = run->s390_sieic.ipa & 0xf;
-
- return (void *)run->s.regs.gprs[reg];
- }
- return NULL;
-}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
--- /dev/null
+handle_exception:
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %r11
+ push %r10
+ push %r9
+ push %r8
+
+ push %rdi
+ push %rsi
+ push %rbp
+ push %rbx
+ push %rdx
+ push %rcx
+ push %rax
+ mov %rsp, %rdi
+
+ call route_exception
+
+ pop %rax
+ pop %rcx
+ pop %rdx
+ pop %rbx
+ pop %rbp
+ pop %rsi
+ pop %rdi
+ pop %r8
+ pop %r9
+ pop %r10
+ pop %r11
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ /* Discard vector and error code. */
+ add $16, %rsp
+ iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+ vector = \from
+ .rept \to - \from + 1
+ .align 8
+
+ /* Fetch current address and append it to idt_handlers. */
+666 :
+.pushsection .rodata
+ .quad 666b
+.popsection
+
+ .if ! \has_error
+ pushq $0
+ .endif
+ pushq $vector
+ jmp handle_exception
+ vector = vector + 1
+ .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+ HANDLERS has_error=0 from=0 to=7
+ HANDLERS has_error=1 from=8 to=8
+ HANDLERS has_error=0 from=9 to=9
+ HANDLERS has_error=1 from=10 to=14
+ HANDLERS has_error=0 from=15 to=16
+ HANDLERS has_error=1 from=17 to=17
+ HANDLERS has_error=0 from=18 to=255
+
+.section .note.GNU-stack, "", %progbits
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 *cpuid_full;
+ const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ int i, nent = 0;
+
+ if (!cpuid_full) {
+ cpuid_sys = kvm_get_supported_cpuid();
+ cpuid_hv = kvm_get_supported_hv_cpuid();
+
+ cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+ if (!cpuid_full) {
+ perror("malloc");
+ abort();
+ }
+
+ /* Need to skip KVM CPUID leaves 0x400000xx */
+ for (i = 0; i < cpuid_sys->nent; i++) {
+ if (cpuid_sys->entries[i].function >= 0x40000000 &&
+ cpuid_sys->entries[i].function < 0x40000100)
+ continue;
+ cpuid_full->entries[nent] = cpuid_sys->entries[i];
+ nent++;
+ }
+
+ memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+ cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+ cpuid_full->nent = nent + cpuid_hv->nent;
+ }
+
+ vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+ vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ return cpuid;
+}
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
+ return false;
+
+ return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
+}
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva)
+{
+ vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+ struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+ /* Setup of a region of guest memory for the VP Assist page. */
+ hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+ hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+ /* Setup of a region of guest memory for the partition assist page. */
+ hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+ hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
+ /* Setup of a region of guest memory for the enlightened VMCS. */
+ hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+ hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+ *p_hv_pages_gva = hv_pages_gva;
+ return hv;
+}
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+ uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+ current_vp_assist = vp_assist;
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86-specific extensions to memstress.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "vmx.h"
+
+void memstress_l2_guest_code(uint64_t vcpu_id)
+{
+ memstress_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char memstress_l2_guest_entry[];
+__asm__(
+"memstress_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call memstress_l2_guest_code;"
+" ud2;"
+);
+
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t memstress_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm, 0);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(memstress_args.gpa, PG_SIZE_1G);
+ end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has_ept());
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ memstress_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run memstress_l1_guest_code() which will
+ * bounce it into L2 before calling memstress_guest_code().
+ */
+ vcpu_regs_get(vcpus[vcpu_id], ®s);
+ regs.rip = (unsigned long) memstress_l1_guest_code;
+ vcpu_regs_set(vcpus[vcpu_id], ®s);
+ vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ }
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+ AMD_ZEN_CORE_CYCLES,
+ AMD_ZEN_INSTRUCTIONS_RETIRED,
+ AMD_ZEN_BRANCHES_RETIRED,
+ AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "linux/bitmap.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define KERNEL_CS 0x8
+#define KERNEL_DS 0x10
+#define KERNEL_TSS 0x18
+
+vm_vaddr_t exception_handlers;
+bool host_cpu_is_amd;
+bool host_cpu_is_intel;
+bool is_forced_emulation_enabled;
+uint64_t guest_tsc_khz;
+
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+bool kvm_is_tdp_enabled(void)
+{
+ if (host_cpu_is_intel)
+ return get_kvm_intel_param_bool("ept");
+ else
+ return get_kvm_amd_param_bool("npt");
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ /* If needed, create page map l4 table. */
+ if (!vm->pgd_created) {
+ vm->pgd = vm_alloc_page_table(vm);
+ vm->pgd_created = true;
+ }
+}
+
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+ uint64_t vaddr, int level)
+{
+ uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
+ uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+ TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+ level + 1, vaddr);
+
+ return &page_table[index];
+}
+
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t *parent_pte,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+
+ paddr = vm_untag_gpa(vm, paddr);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (current_level == target_level)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ else
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ "Cannot create page table at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+{
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+ TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+ "Unexpected bits in paddr: %lx", paddr);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
+ if (*pml4e & PTE_LARGE_MASK)
+ return;
+
+ pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
+ if (*pdpe & PTE_LARGE_MASK)
+ return;
+
+ pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
+ if (*pde & PTE_LARGE_MASK)
+ return;
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+ /*
+ * Neither SEV nor TDX supports shared page tables, so only the final
+ * leaf PTE needs manually set the C/S-bit.
+ */
+ if (vm_is_gpa_protected(vm, paddr))
+ *pte |= vm->arch.c_bit;
+ else
+ *pte |= vm->arch.s_bit;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+}
+
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level)
+{
+ uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t nr_pages = nr_bytes / pg_size;
+ int i;
+
+ TEST_ASSERT(nr_bytes % pg_size == 0,
+ "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+ nr_bytes, pg_size);
+
+ for (i = 0; i < nr_pages; i++) {
+ __virt_pg_map(vm, vaddr, paddr, level);
+
+ vaddr += pg_size;
+ paddr += pg_size;
+ }
+}
+
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+ if (*pte & PTE_LARGE_MASK) {
+ TEST_ASSERT(*level == PG_LEVEL_NONE ||
+ *level == current_level,
+ "Unexpected hugepage at level %d", current_level);
+ *level = current_level;
+ }
+
+ return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level)
+{
+ uint64_t *pml4e, *pdpe, *pde;
+
+ TEST_ASSERT(!vm->arch.is_pt_protected,
+ "Walking page tables of protected guests is impossible");
+
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ "Invalid PG_LEVEL_* '%d'", *level);
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ /*
+ * Based on the mode check above there are 48 bits in the vaddr, so
+ * shift 16 to sign extend the last bit (bit-47),
+ */
+ TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+ "Canonical check failed. The virtual address is invalid.");
+
+ pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
+ if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
+ return pml4e;
+
+ pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
+ if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
+ return pdpe;
+
+ pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
+ if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
+ return pde;
+
+ return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+}
+
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+{
+ int level = PG_LEVEL_4K;
+
+ return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!(*pml4e & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!(*pdpe & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
+
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!(*pde & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10llx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
+
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!(*pte & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10llx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by @segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+ void *gdt = addr_gva2hva(vm, vm->arch.gdt);
+ struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+ desc->limit0 = segp->limit & 0xFFFF;
+ desc->base0 = segp->base & 0xFFFF;
+ desc->base1 = segp->base >> 16;
+ desc->type = segp->type;
+ desc->s = segp->s;
+ desc->dpl = segp->dpl;
+ desc->p = segp->present;
+ desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
+ desc->l = segp->l;
+ desc->db = segp->db;
+ desc->g = segp->g;
+ desc->base2 = segp->base >> 24;
+ if (!segp->s)
+ desc->base3 = segp->base >> 32;
+}
+
+static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = KERNEL_CS;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+}
+
+static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = KERNEL_DS;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ int level = PG_LEVEL_NONE;
+ uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+
+ TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
+
+ /*
+ * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+ * address bits to be zero.
+ */
+ return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
+}
+
+static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->base = base;
+ segp->limit = 0x67;
+ segp->selector = KERNEL_TSS;
+ segp->type = 0xb;
+ segp->present = 1;
+}
+
+static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct kvm_sregs sregs;
+
+ TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vcpu, &sregs);
+
+ sregs.idt.base = vm->arch.idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->arch.gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ sregs.cr4 |= X86_CR4_OSXSAVE;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(&sregs.cs);
+ kvm_seg_set_kernel_data_64bit(&sregs.ds);
+ kvm_seg_set_kernel_data_64bit(&sregs.es);
+ kvm_seg_set_kernel_data_64bit(&sregs.gs);
+ kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct kvm_xcrs xcrs = {
+ .nr_xcrs = 1,
+ .xcrs[0].xcr = 0,
+ .xcrs[0].value = kvm_cpu_supported_xcr0(),
+ };
+
+ if (!kvm_cpu_has(X86_FEATURE_XSAVE))
+ return;
+
+ vcpu_xcrs_set(vcpu, &xcrs);
+}
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
+
+ if (regs->vector == DE_VECTOR)
+ return false;
+
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ regs->r10 = regs->error_code;
+ return true;
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
+
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
+ }
+
+ if (kvm_fixup_exception(regs))
+ return;
+
+ GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
+}
+
+static void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ extern void *idt_handlers;
+ struct kvm_segment seg;
+ int i;
+
+ vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+
+ kvm_seg_set_kernel_code_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_kernel_data_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+ REPORT_GUEST_ASSERT(uc);
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+ int r;
+
+ TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
+ "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
+
+ vm_create_irqchip(vm);
+ vm_init_descriptor_tables(vm);
+
+ sync_global_to_guest(vm, host_cpu_is_intel);
+ sync_global_to_guest(vm, host_cpu_is_amd);
+ sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+ if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ struct kvm_sev_init init = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+
+ r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
+ guest_tsc_khz = r;
+ sync_global_to_guest(vm, guest_tsc_khz);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, ®s);
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vcpu, ®s);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ struct kvm_vcpu *vcpu;
+
+ stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+ /*
+ * Align stack to match calling sequence requirements in section "The
+ * Stack Frame" of the System V ABI AMD64 Architecture Processor
+ * Supplement, which requires the value (%rsp + 8) to be a multiple of
+ * 16 when control is transferred to the function entry point.
+ *
+ * If this code is ever used to launch a vCPU with 32-bit entry point it
+ * may need to subtract 4 bytes instead of 8 bytes.
+ */
+ TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+ "__vm_vaddr_alloc() did not provide a page-aligned address");
+ stack_vaddr -= 8;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+ vcpu_init_sregs(vm, vcpu);
+ vcpu_init_xcrs(vm, vcpu);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, ®s);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr;
+ vcpu_regs_set(vcpu, ®s);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_mp_state_set(vcpu, &mp_state);
+
+ /*
+ * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
+ * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
+ * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
+ * is consistent with vCPU state.
+ */
+ vcpu_get_cpuid(vcpu);
+ return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+
+ return vcpu;
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->cpuid)
+ free(vcpu->cpuid);
+}
+
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ int kvm_fd;
+
+ if (kvm_supported_cpuid)
+ return kvm_supported_cpuid;
+
+ kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+ (struct kvm_cpuid2 *)kvm_supported_cpuid);
+
+ close(kvm_fd);
+ return kvm_supported_cpuid;
+}
+
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
+{
+ const struct kvm_cpuid_entry2 *entry;
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ entry = &cpuid->entries[i];
+
+ /*
+ * The output registers in kvm_cpuid_entry2 are in alphabetical
+ * order, but kvm_x86_cpu_feature matches that mess, so yay
+ * pointer shenanigans!
+ */
+ if (entry->function == function && entry->index == index)
+ return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
+ }
+
+ return 0;
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature)
+{
+ return __kvm_cpu_has(cpuid, feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property)
+{
+ return __kvm_cpu_has(cpuid, property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
+
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r, kvm_fd;
+
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
+
+ close(kvm_fd);
+ return buffer.entry.data;
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
+{
+ int kvm_fd;
+ u64 bitmask;
+ long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask,
+ };
+
+ TEST_ASSERT(!kvm_supported_cpuid,
+ "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
+
+ TEST_ASSERT(is_power_of_2(xfeature),
+ "Dynamic XFeatures must be enabled one at a time");
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
+
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+
+ __TEST_REQUIRE(bitmask & xfeature,
+ "Required XSAVE feature '%s' not supported", name);
+
+ TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & xfeature,
+ "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+ name, xfeature, bitmask);
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
+{
+ TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
+
+ /* Allow overriding the default CPUID. */
+ if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+ free(vcpu->cpuid);
+ vcpu->cpuid = NULL;
+ }
+
+ if (!vcpu->cpuid)
+ vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
+
+ memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+ (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+ (&entry->eax)[property.reg] |= value << property.lo_bit;
+
+ vcpu_set_cpuid(vcpu);
+
+ /* Sanity check that @value doesn't exceed the bounds in any way. */
+ TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
+}
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+{
+ struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
+
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set)
+{
+ struct kvm_cpuid_entry2 *entry;
+ u32 *reg;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ reg = (&entry->eax) + feature.reg;
+
+ if (set)
+ *reg |= BIT(feature.bit);
+ else
+ *reg &= ~BIT(feature.bit);
+
+ vcpu_set_cpuid(vcpu);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+
+ vcpu_msrs_get(vcpu, &buffer.header);
+
+ return buffer.entry.data;
+}
+
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ buffer.entry.data = msr_value;
+
+ return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vcpu, ®s);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vcpu, ®s);
+ va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vcpu, ®s);
+ regs_dump(stream, ®s, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
+{
+ struct kvm_msr_list *list;
+ struct kvm_msr_list nmsrs;
+ int kvm_fd, r;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ nmsrs.nmsrs = 0;
+ if (!feature_msrs)
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ else
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
+
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Expected -E2BIG, got rc: %i errno: %i (%s)",
+ r, errno, strerror(errno));
+
+ list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+ TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+ list->nmsrs = nmsrs.nmsrs;
+
+ if (!feature_msrs)
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ else
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ close(kvm_fd);
+
+ TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+ "Number of MSRs in list changed, was %d, now %d",
+ nmsrs.nmsrs, list->nmsrs);
+ return list;
+}
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+ static const struct kvm_msr_list *list;
+
+ if (!list)
+ list = __kvm_get_msr_index_list(false);
+ return list;
+}
+
+
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
+{
+ static const struct kvm_msr_list *list;
+
+ if (!list)
+ list = __kvm_get_msr_index_list(true);
+ return list;
+}
+
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+{
+ const struct kvm_msr_list *list = kvm_get_msr_index_list();
+ int i;
+
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index)
+ return true;
+ }
+
+ return false;
+}
+
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+ struct kvm_x86_state *state)
+{
+ int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+ if (size) {
+ state->xsave = malloc(size);
+ vcpu_xsave2_get(vcpu, state->xsave);
+ } else {
+ state->xsave = malloc(sizeof(struct kvm_xsave));
+ vcpu_xsave_get(vcpu, state->xsave);
+ }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
+ struct kvm_x86_state *state;
+ int i;
+
+ static int nested_size = -1;
+
+ if (nested_size == -1) {
+ nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+ TEST_ASSERT(nested_size <= sizeof(state->nested_),
+ "Nested state size too big, %i > %zi",
+ nested_size, sizeof(state->nested_));
+ }
+
+ /*
+ * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+ * guest state is consistent only after userspace re-enters the
+ * kernel with KVM_RUN. Complete IO prior to migrating state
+ * to a new VM.
+ */
+ vcpu_run_complete_io(vcpu);
+
+ state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+ TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
+
+ vcpu_events_get(vcpu, &state->events);
+ vcpu_mp_state_get(vcpu, &state->mp_state);
+ vcpu_regs_get(vcpu, &state->regs);
+ vcpu_save_xsave_state(vcpu, state);
+
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_get(vcpu, &state->xcrs);
+
+ vcpu_sregs_get(vcpu, &state->sregs);
+
+ if (nested_size) {
+ state->nested.size = sizeof(state->nested_);
+
+ vcpu_nested_state_get(vcpu, &state->nested);
+ TEST_ASSERT(state->nested.size <= nested_size,
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
+ } else {
+ state->nested.size = 0;
+ }
+
+ state->msrs.nmsrs = msr_list->nmsrs;
+ for (i = 0; i < msr_list->nmsrs; i++)
+ state->msrs.entries[i].index = msr_list->indices[i];
+ vcpu_msrs_get(vcpu, &state->msrs);
+
+ vcpu_debugregs_get(vcpu, &state->debugregs);
+
+ return state;
+}
+
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
+{
+ vcpu_sregs_set(vcpu, &state->sregs);
+ vcpu_msrs_set(vcpu, &state->msrs);
+
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_set(vcpu, &state->xcrs);
+
+ vcpu_xsave_set(vcpu, state->xsave);
+ vcpu_events_set(vcpu, &state->events);
+ vcpu_mp_state_set(vcpu, &state->mp_state);
+ vcpu_debugregs_set(vcpu, &state->debugregs);
+ vcpu_regs_set(vcpu, &state->regs);
+
+ if (state->nested.size)
+ vcpu_nested_state_set(vcpu, &state->nested);
+}
+
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+ free(state->xsave);
+ free(state);
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
+ *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
+ *va_bits = 32;
+ } else {
+ *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
+ }
+}
+
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+ vm->gpa_tag_mask = vm->arch.c_bit;
+ } else {
+ vm->arch.sev_fd = -1;
+ }
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index)
+ return &cpuid->entries[i];
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
+#define X86_HYPERCALL(inputs...) \
+({ \
+ uint64_t r; \
+ \
+ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \
+ "jnz 1f\n\t" \
+ "vmcall\n\t" \
+ "jmp 2f\n\t" \
+ "1: vmmcall\n\t" \
+ "2:" \
+ : "=a"(r) \
+ : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
+ \
+ r; \
+})
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3)
+{
+ return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+}
+
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+ const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+ unsigned long ht_gfn, max_gfn, max_pfn;
+ uint8_t maxphyaddr, guest_maxphyaddr;
+
+ /*
+ * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
+ * enumerates the max _mappable_ GPA, which can be less than the raw
+ * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+ * doesn't support 5-level TDP.
+ */
+ guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+ guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+ TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+ "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
+
+ max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
+
+ /* Avoid reserved HyperTransport region on AMD processors. */
+ if (!host_cpu_is_amd)
+ return max_gfn;
+
+ /* On parts with <40 physical address bits, the area is fully hidden */
+ if (vm->pa_bits < 40)
+ return max_gfn;
+
+ /* Before family 17h, the HyperTransport area is just below 1T. */
+ ht_gfn = (1 << 28) - num_ht_pages;
+ if (this_cpu_family() < 0x17)
+ goto done;
+
+ /*
+ * Otherwise it's at the top of the physical address space, possibly
+ * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * the old conservative value if MAXPHYADDR is not enumerated.
+ */
+ if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
+ goto done;
+
+ maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+ if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+ max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
+
+ ht_gfn = max_pfn - num_ht_pages;
+done:
+ return min(max_gfn, ht_gfn - 1);
+}
+
+/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+ /* Ensure that a KVM vendor-specific module is loaded. */
+ if (vm == NULL)
+ close(open_kvm_dev_path_or_exit());
+
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+void kvm_selftest_arch_init(void)
+{
+ host_cpu_is_intel = this_cpu_is_intel();
+ host_cpu_is_amd = this_cpu_is_amd();
+ is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+ char *clk_name = sys_get_cur_clocksource();
+ bool ret = !strcmp(clk_name, "tsc\n") ||
+ !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+ free(clk_name);
+
+ return ret;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+ const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+ const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+ const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+ sparsebit_idx_t i, j;
+
+ if (!sparsebit_any_set(protected_phy_pages))
+ return;
+
+ sev_register_encrypted_memory(vm, region);
+
+ sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+ const uint64_t size = (j - i + 1) * vm->page_size;
+ const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+ sev_launch_update_data(vm, gpa_base + offset, size);
+ }
+}
+
+void sev_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ assert(vm->arch.sev_fd == -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ assert(vm->type == KVM_X86_SEV_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
+void sev_es_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ assert(vm->arch.sev_fd == -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ assert(vm->type == KVM_X86_SEV_ES_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+ struct kvm_sev_launch_start launch_start = {
+ .policy = policy,
+ };
+ struct userspace_mem_region *region;
+ struct kvm_sev_guest_status status;
+ int ctr;
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+ TEST_ASSERT_EQ(status.policy, policy);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region);
+
+ if (policy & SEV_POLICY_ES)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+ struct kvm_sev_launch_measure launch_measure;
+ struct kvm_sev_guest_status guest_status;
+
+ launch_measure.len = 256;
+ launch_measure.uaddr = (__u64)measurement;
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+ TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+ status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+ "Unexpected guest state: %d", status.state);
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+ struct kvm_vcpu **cpu)
+{
+ struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = type,
+ };
+ struct kvm_vm *vm;
+ struct kvm_vcpu *cpus[1];
+
+ vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+ *cpu = cpus[0];
+
+ return vm;
+}
+
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+{
+ sev_vm_launch(vm, policy);
+
+ if (!measurement)
+ measurement = alloca(256);
+
+ sev_vm_launch_measure(vm, measurement);
+
+ sev_vm_launch_finish(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers used for nested SVM testing
+ * Largely inspired from KVM unit test svm.c
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define SEV_DEV_PATH "/dev/sev"
+
+struct gpr64_regs guest_regs;
+u64 rflags;
+
+/* Allocate memory regions for nested SVM tests.
+ *
+ * Input Args:
+ * vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ * p_svm_gva - The guest virtual address for the struct svm_test_data.
+ *
+ * Return:
+ * Pointer to structure with the addresses of the SVM areas.
+ */
+struct svm_test_data *
+vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+{
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
+ struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
+
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
+ svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
+ svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
+
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
+ svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
+ svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+
+ svm->msr = (void *)vm_vaddr_alloc_page(vm);
+ svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+ svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+ memset(svm->msr_hva, 0, getpagesize());
+
+ *p_svm_gva = svm_gva;
+ return svm;
+}
+
+static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+ u64 base, u32 limit, u32 attr)
+{
+ seg->selector = selector;
+ seg->attrib = attr;
+ seg->limit = limit;
+ seg->base = base;
+}
+
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ uint64_t vmcb_gpa = svm->vmcb_gpa;
+ struct vmcb_save_area *save = &vmcb->save;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+ u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+ u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+ uint64_t efer;
+
+ efer = rdmsr(MSR_EFER);
+ wrmsr(MSR_EFER, efer | EFER_SVME);
+ wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+ memset(vmcb, 0, sizeof(*vmcb));
+ asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
+ vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+ vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
+ vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
+
+ ctrl->asid = 1;
+ save->cpl = 0;
+ save->efer = rdmsr(MSR_EFER);
+ asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
+ asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
+ asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
+ asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
+ asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
+ asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
+ save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+ save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+ ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+ (1ULL << INTERCEPT_VMMCALL);
+ ctrl->msrpm_base_pa = svm->msr_gpa;
+
+ vmcb->save.rip = (u64)guest_rip;
+ vmcb->save.rsp = (u64)guest_rsp;
+ guest_regs.rdi = (u64)svm;
+}
+
+/*
+ * save/restore 64-bit general registers except rax, rip, rsp
+ * which are directly handed through the VMCB guest processor state
+ */
+#define SAVE_GPR_C \
+ "xchg %%rbx, guest_regs+0x20\n\t" \
+ "xchg %%rcx, guest_regs+0x10\n\t" \
+ "xchg %%rdx, guest_regs+0x18\n\t" \
+ "xchg %%rbp, guest_regs+0x30\n\t" \
+ "xchg %%rsi, guest_regs+0x38\n\t" \
+ "xchg %%rdi, guest_regs+0x40\n\t" \
+ "xchg %%r8, guest_regs+0x48\n\t" \
+ "xchg %%r9, guest_regs+0x50\n\t" \
+ "xchg %%r10, guest_regs+0x58\n\t" \
+ "xchg %%r11, guest_regs+0x60\n\t" \
+ "xchg %%r12, guest_regs+0x68\n\t" \
+ "xchg %%r13, guest_regs+0x70\n\t" \
+ "xchg %%r14, guest_regs+0x78\n\t" \
+ "xchg %%r15, guest_regs+0x80\n\t"
+
+#define LOAD_GPR_C SAVE_GPR_C
+
+/*
+ * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
+ * for now. registers involved in LOAD/SAVE_GPR_C are eventually
+ * unmodified so they do not need to be in the clobber list.
+ */
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+{
+ asm volatile (
+ "vmload %[vmcb_gpa]\n\t"
+ "mov rflags, %%r15\n\t" // rflags
+ "mov %%r15, 0x170(%[vmcb])\n\t"
+ "mov guest_regs, %%r15\n\t" // rax
+ "mov %%r15, 0x1f8(%[vmcb])\n\t"
+ LOAD_GPR_C
+ "vmrun %[vmcb_gpa]\n\t"
+ SAVE_GPR_C
+ "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
+ "mov %%r15, rflags\n\t"
+ "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
+ "mov %%r15, guest_regs\n\t"
+ "vmsave %[vmcb_gpa]\n\t"
+ : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
+ : "r15", "memory");
+}
+
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ * The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
+{
+ return open_path_or_exit(SEV_DEV_PATH, 0);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ /*
+ * FIXME: Revert this hack (the entire commit that added it) once nVMX
+ * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g.
+ * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+ * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP
+ * in particular is problematic) along with RDX and RDI (which are
+ * inputs), and clobber volatile GPRs. *sigh*
+ */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+ "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+ asm volatile("push %%rbp\n\t"
+ "push %%r15\n\t"
+ "push %%r14\n\t"
+ "push %%r13\n\t"
+ "push %%r12\n\t"
+ "push %%rbx\n\t"
+ "push %%rdx\n\t"
+ "push %%rdi\n\t"
+ "in %[port], %%al\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rbx\n\t"
+ "pop %%r12\n\t"
+ "pop %%r13\n\t"
+ "pop %%r14\n\t"
+ "pop %%r15\n\t"
+ "pop %%rbp\n\t"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+ HORRIFIC_L2_UCALL_CLOBBER_HACK);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, ®s);
+ return (void *)regs.rdi;
+ }
+ return NULL;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include <asm/msr-index.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PAGE_SHIFT_4K 12
+
+#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+
+bool enable_evmcs;
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+struct eptPageTableEntry {
+ uint64_t readable:1;
+ uint64_t writable:1;
+ uint64_t executable:1;
+ uint64_t memory_type:3;
+ uint64_t ignore_pat:1;
+ uint64_t page_size:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t ignored_11_10:2;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t suppress_ve:1;
+};
+
+struct eptPageTablePointer {
+ uint64_t memory_type:3;
+ uint64_t page_walk_length:3;
+ uint64_t ad_enabled:1;
+ uint64_t reserved_11_07:5;
+ uint64_t address:40;
+ uint64_t reserved_63_52:12;
+};
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
+{
+ uint16_t evmcs_ver;
+
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ (unsigned long)&evmcs_ver);
+
+ /* KVM should return supported EVMCS version range */
+ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
+ (evmcs_ver & 0xff) > 0,
+ "Incorrect EVMCS version range: %x:%x",
+ evmcs_ver & 0xff, evmcs_ver >> 8);
+
+ return evmcs_ver;
+}
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ * vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ * p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ * Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
+ struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+ /* Setup of a region of guest memory for the vmxon region. */
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+ vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+ /* Setup of a region of guest memory for a vmcs. */
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+ vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+ /* Setup of a region of guest memory for the MSR bitmap. */
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
+ vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+ vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+ memset(vmx->msr_hva, 0, getpagesize());
+
+ /* Setup of a region of guest memory for the shadow VMCS. */
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+ vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+ /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+ vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+ memset(vmx->vmread_hva, 0, getpagesize());
+
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+ vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+ memset(vmx->vmwrite_hva, 0, getpagesize());
+
+ *p_vmx_gva = vmx_gva;
+ return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+ uint64_t feature_control;
+ uint64_t required;
+ unsigned long cr0;
+ unsigned long cr4;
+
+ /*
+ * Ensure bits in CR0 and CR4 are valid in VMX operation:
+ * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+ * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+ */
+ __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+ cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+ cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+ __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+ __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+ cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+ cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+ /* Enable VMX operation */
+ cr4 |= X86_CR4_VMXE;
+ __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+ /*
+ * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+ * Bit 0: Lock bit. If clear, VMXON causes a #GP.
+ * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+ * outside of SMX causes a #GP.
+ */
+ required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+ required |= FEAT_CTL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEAT_CTL);
+ if ((feature_control & required) != required)
+ wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
+
+ /* Enter VMX root operation. */
+ *(uint32_t *)(vmx->vmxon) = vmcs_revision();
+ if (vmxon(vmx->vmxon_gpa))
+ return false;
+
+ return true;
+}
+
+bool load_vmcs(struct vmx_pages *vmx)
+{
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
+
+ return true;
+}
+
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+ uint32_t sec_exec_ctl = 0;
+
+ vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+ vmwrite(POSTED_INTR_NV, 0);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+
+ if (vmx->eptp_gpa) {
+ uint64_t ept_paddr;
+ struct eptPageTablePointer eptp = {
+ .memory_type = X86_MEMTYPE_WB,
+ .page_walk_length = 3, /* + 1 */
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
+ .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
+ };
+
+ memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
+ vmwrite(EPT_POINTER, ept_paddr);
+ sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
+ }
+
+ if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+ else {
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+ GUEST_ASSERT(!sec_exec_ctl);
+ }
+
+ vmwrite(EXCEPTION_BITMAP, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+ vmwrite(CR3_TARGET_COUNT, 0);
+ vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+ VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
+ vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+ VM_ENTRY_IA32E_MODE); /* 64-bit guest */
+ vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmwrite(TPR_THRESHOLD, 0);
+
+ vmwrite(CR0_GUEST_HOST_MASK, 0);
+ vmwrite(CR4_GUEST_HOST_MASK, 0);
+ vmwrite(CR0_READ_SHADOW, get_cr0());
+ vmwrite(CR4_READ_SHADOW, get_cr4());
+
+ vmwrite(MSR_BITMAP, vmx->msr_gpa);
+ vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+ vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+ uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+ vmwrite(HOST_ES_SELECTOR, get_es());
+ vmwrite(HOST_CS_SELECTOR, get_cs());
+ vmwrite(HOST_SS_SELECTOR, get_ss());
+ vmwrite(HOST_DS_SELECTOR, get_ds());
+ vmwrite(HOST_FS_SELECTOR, get_fs());
+ vmwrite(HOST_GS_SELECTOR, get_gs());
+ vmwrite(HOST_TR_SELECTOR, get_tr());
+
+ if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+ vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+ if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+ vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+ if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+ rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+ vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+ vmwrite(HOST_CR0, get_cr0());
+ vmwrite(HOST_CR3, get_cr3());
+ vmwrite(HOST_CR4, get_cr4());
+ vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+ vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+ vmwrite(HOST_TR_BASE,
+ get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt().address);
+ vmwrite(HOST_IDTR_BASE, get_idt().address);
+ vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+ vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+ vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+ vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+ vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+ vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+ vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+ vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+ vmwrite(GUEST_LDTR_SELECTOR, 0);
+ vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+ vmwrite(GUEST_INTR_STATUS, 0);
+ vmwrite(GUEST_PML_INDEX, 0);
+
+ vmwrite(VMCS_LINK_POINTER, -1ll);
+ vmwrite(GUEST_IA32_DEBUGCTL, 0);
+ vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+ vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+ vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ vmwrite(GUEST_ES_LIMIT, -1);
+ vmwrite(GUEST_CS_LIMIT, -1);
+ vmwrite(GUEST_SS_LIMIT, -1);
+ vmwrite(GUEST_DS_LIMIT, -1);
+ vmwrite(GUEST_FS_LIMIT, -1);
+ vmwrite(GUEST_GS_LIMIT, -1);
+ vmwrite(GUEST_LDTR_LIMIT, -1);
+ vmwrite(GUEST_TR_LIMIT, 0x67);
+ vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_ES_AR_BYTES,
+ vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+ vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+ vmwrite(GUEST_DS_AR_BYTES,
+ vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_FS_AR_BYTES,
+ vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_GS_AR_BYTES,
+ vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+ vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+ vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmwrite(GUEST_ACTIVITY_STATE, 0);
+ vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+ vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+ vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+ vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+ vmwrite(GUEST_ES_BASE, 0);
+ vmwrite(GUEST_CS_BASE, 0);
+ vmwrite(GUEST_SS_BASE, 0);
+ vmwrite(GUEST_DS_BASE, 0);
+ vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+ vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+ vmwrite(GUEST_LDTR_BASE, 0);
+ vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+ vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+ vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+ vmwrite(GUEST_DR7, 0x400);
+ vmwrite(GUEST_RSP, (uint64_t)rsp);
+ vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+ vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+ init_vmcs_control_fields(vmx);
+ init_vmcs_host_state();
+ init_vmcs_guest_state(guest_rip, guest_rsp);
+}
+
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
+ }
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
+{
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx requires 5-level paging",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
+ "Nested physical address not on page boundary,\n"
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
+ TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+ TEST_ASSERT((paddr % page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
+
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
+
+ if (pte->page_size)
+ break;
+
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
+
+ /*
+ * For now mark these as accessed and dirty because the only
+ * testcase we have needs that. Can be reconsidered later.
+ */
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
+}
+
+/*
+ * Map a range of EPT guest physical addresses to the VM's physical address
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * nested_paddr - Nested guest physical address to map
+ * paddr - VM Physical Address
+ * size - The size of the range to map
+ * level - The level at which to map the range
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a nested guest translation for the
+ * page range starting at nested_paddr to the page range starting at paddr.
+ */
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
+{
+ size_t page_size = PG_LEVEL_SIZE(level);
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+ TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+ while (npages--) {
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
+ nested_paddr += page_size;
+ paddr += page_size;
+ }
+}
+
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t memslot)
+{
+ sparsebit_idx_t i, last;
+ struct userspace_mem_region *region =
+ memslot2region(vm, memslot);
+
+ i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+ last = i + (region->region.memory_size >> vm->page_shift);
+ for (;;) {
+ i = sparsebit_next_clear(region->unused_phy_pages, i);
+ if (i > last)
+ break;
+
+ nested_map(vmx, vm,
+ (uint64_t)i << vm->page_shift,
+ (uint64_t)i << vm->page_shift,
+ 1 << vm->page_shift);
+ }
+}
+
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
+bool kvm_cpu_has_ept(void)
+{
+ uint64_t ctrl;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+ if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ return false;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+ return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t eptp_memslot)
+{
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
+ vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
+ vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
+}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
+ vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+ vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Google LLC.
- */
-
-#include "apic.h"
-
-void apic_disable(void)
-{
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) &
- ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void xapic_enable(void)
-{
- uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
- /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
- if (val & MSR_IA32_APICBASE_EXTD) {
- apic_disable();
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
- } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
- wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
- }
-
- /*
- * Per SDM: reset value of spurious interrupt vector register has the
- * APIC software enabled bit=0. It must be enabled in addition to the
- * enable bit in the MSR.
- */
- val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
- xapic_write_reg(APIC_SPIV, val);
-}
-
-void x2apic_enable(void)
-{
- wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
- MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
- x2apic_write_reg(APIC_SPIV,
- x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
-}
+++ /dev/null
-handle_exception:
- push %r15
- push %r14
- push %r13
- push %r12
- push %r11
- push %r10
- push %r9
- push %r8
-
- push %rdi
- push %rsi
- push %rbp
- push %rbx
- push %rdx
- push %rcx
- push %rax
- mov %rsp, %rdi
-
- call route_exception
-
- pop %rax
- pop %rcx
- pop %rdx
- pop %rbx
- pop %rbp
- pop %rsi
- pop %rdi
- pop %r8
- pop %r9
- pop %r10
- pop %r11
- pop %r12
- pop %r13
- pop %r14
- pop %r15
-
- /* Discard vector and error code. */
- add $16, %rsp
- iretq
-
-/*
- * Build the handle_exception wrappers which push the vector/error code on the
- * stack and an array of pointers to those wrappers.
- */
-.pushsection .rodata
-.globl idt_handlers
-idt_handlers:
-.popsection
-
-.macro HANDLERS has_error from to
- vector = \from
- .rept \to - \from + 1
- .align 8
-
- /* Fetch current address and append it to idt_handlers. */
-666 :
-.pushsection .rodata
- .quad 666b
-.popsection
-
- .if ! \has_error
- pushq $0
- .endif
- pushq $vector
- jmp handle_exception
- vector = vector + 1
- .endr
-.endm
-
-.global idt_handler_code
-idt_handler_code:
- HANDLERS has_error=0 from=0 to=7
- HANDLERS has_error=1 from=8 to=8
- HANDLERS has_error=0 from=9 to=9
- HANDLERS has_error=1 from=10 to=14
- HANDLERS has_error=0 from=15 to=16
- HANDLERS has_error=1 from=17 to=17
- HANDLERS has_error=0 from=18 to=255
-
-.section .note.GNU-stack, "", %progbits
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Hyper-V specific functions.
- *
- * Copyright (C) 2021, Red Hat Inc.
- */
-#include <stdint.h>
-#include "processor.h"
-#include "hyperv.h"
-
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
-{
- static struct kvm_cpuid2 *cpuid;
- int kvm_fd;
-
- if (cpuid)
- return cpuid;
-
- cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
- kvm_fd = open_kvm_dev_path_or_exit();
-
- kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
- close(kvm_fd);
- return cpuid;
-}
-
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
-{
- static struct kvm_cpuid2 *cpuid_full;
- const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
- int i, nent = 0;
-
- if (!cpuid_full) {
- cpuid_sys = kvm_get_supported_cpuid();
- cpuid_hv = kvm_get_supported_hv_cpuid();
-
- cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
- if (!cpuid_full) {
- perror("malloc");
- abort();
- }
-
- /* Need to skip KVM CPUID leaves 0x400000xx */
- for (i = 0; i < cpuid_sys->nent; i++) {
- if (cpuid_sys->entries[i].function >= 0x40000000 &&
- cpuid_sys->entries[i].function < 0x40000100)
- continue;
- cpuid_full->entries[nent] = cpuid_sys->entries[i];
- nent++;
- }
-
- memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
- cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
- cpuid_full->nent = nent + cpuid_hv->nent;
- }
-
- vcpu_init_cpuid(vcpu, cpuid_full);
-}
-
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-
- vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
- return cpuid;
-}
-
-bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
-{
- if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
- return false;
-
- return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
-}
-
-struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
- vm_vaddr_t *p_hv_pages_gva)
-{
- vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
- struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
-
- /* Setup of a region of guest memory for the VP Assist page. */
- hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
- hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
- hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
-
- /* Setup of a region of guest memory for the partition assist page. */
- hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
- hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
- hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
-
- /* Setup of a region of guest memory for the enlightened VMCS. */
- hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
- hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
- hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
-
- *p_hv_pages_gva = hv_pages_gva;
- return hv;
-}
-
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
-{
- uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
- HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
- wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
-
- current_vp_assist = vp_assist;
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * x86_64-specific extensions to memstress.c.
- *
- * Copyright (C) 2022, Google, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "memstress.h"
-#include "processor.h"
-#include "vmx.h"
-
-void memstress_l2_guest_code(uint64_t vcpu_id)
-{
- memstress_guest_code(vcpu_id);
- vmcall();
-}
-
-extern char memstress_l2_guest_entry[];
-__asm__(
-"memstress_l2_guest_entry:"
-" mov (%rsp), %rdi;"
-" call memstress_l2_guest_code;"
-" ud2;"
-);
-
-static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- unsigned long *rsp;
-
- GUEST_ASSERT(vmx->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx));
- GUEST_ASSERT(load_vmcs(vmx));
- GUEST_ASSERT(ept_1g_pages_supported());
-
- rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
- *rsp = vcpu_id;
- prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_DONE();
-}
-
-uint64_t memstress_nested_pages(int nr_vcpus)
-{
- /*
- * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
- * pages and 4-level paging, plus a few pages per-vCPU for data
- * structures such as the VMCS.
- */
- return 513 + 10 * nr_vcpus;
-}
-
-void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
- uint64_t start, end;
-
- prepare_eptp(vmx, vm, 0);
-
- /*
- * Identity map the first 4G and the test region with 1G pages so that
- * KVM can shadow the EPT12 with the maximum huge page size supported
- * by the backing source.
- */
- nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
-
- start = align_down(memstress_args.gpa, PG_SIZE_1G);
- end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
- nested_identity_map_1g(vmx, vm, start, end - start);
-}
-
-void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
-{
- struct vmx_pages *vmx, *vmx0 = NULL;
- struct kvm_regs regs;
- vm_vaddr_t vmx_gva;
- int vcpu_id;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_cpu_has_ept());
-
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vmx = vcpu_alloc_vmx(vm, &vmx_gva);
-
- if (vcpu_id == 0) {
- memstress_setup_ept(vmx, vm);
- vmx0 = vmx;
- } else {
- /* Share the same EPT table across all vCPUs. */
- vmx->eptp = vmx0->eptp;
- vmx->eptp_hva = vmx0->eptp_hva;
- vmx->eptp_gpa = vmx0->eptp_gpa;
- }
-
- /*
- * Override the vCPU to run memstress_l1_guest_code() which will
- * bounce it into L2 before calling memstress_guest_code().
- */
- vcpu_regs_get(vcpus[vcpu_id], ®s);
- regs.rip = (unsigned long) memstress_l1_guest_code;
- vcpu_regs_set(vcpus[vcpu_id], ®s);
- vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
- }
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-
-#include <stdint.h>
-
-#include <linux/kernel.h>
-
-#include "kvm_util.h"
-#include "pmu.h"
-
-const uint64_t intel_pmu_arch_events[] = {
- INTEL_ARCH_CPU_CYCLES,
- INTEL_ARCH_INSTRUCTIONS_RETIRED,
- INTEL_ARCH_REFERENCE_CYCLES,
- INTEL_ARCH_LLC_REFERENCES,
- INTEL_ARCH_LLC_MISSES,
- INTEL_ARCH_BRANCHES_RETIRED,
- INTEL_ARCH_BRANCHES_MISPREDICTED,
- INTEL_ARCH_TOPDOWN_SLOTS,
-};
-kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
-
-const uint64_t amd_pmu_zen_events[] = {
- AMD_ZEN_CORE_CYCLES,
- AMD_ZEN_INSTRUCTIONS_RETIRED,
- AMD_ZEN_BRANCHES_RETIRED,
- AMD_ZEN_BRANCHES_MISPREDICTED,
-};
-kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include "linux/bitmap.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sev.h"
-
-#ifndef NUM_INTERRUPTS
-#define NUM_INTERRUPTS 256
-#endif
-
-#define KERNEL_CS 0x8
-#define KERNEL_DS 0x10
-#define KERNEL_TSS 0x18
-
-vm_vaddr_t exception_handlers;
-bool host_cpu_is_amd;
-bool host_cpu_is_intel;
-bool is_forced_emulation_enabled;
-uint64_t guest_tsc_khz;
-
-static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
-{
- fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
- "rcx: 0x%.16llx rdx: 0x%.16llx\n",
- indent, "",
- regs->rax, regs->rbx, regs->rcx, regs->rdx);
- fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
- "rsp: 0x%.16llx rbp: 0x%.16llx\n",
- indent, "",
- regs->rsi, regs->rdi, regs->rsp, regs->rbp);
- fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
- "r10: 0x%.16llx r11: 0x%.16llx\n",
- indent, "",
- regs->r8, regs->r9, regs->r10, regs->r11);
- fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
- "r14: 0x%.16llx r15: 0x%.16llx\n",
- indent, "",
- regs->r12, regs->r13, regs->r14, regs->r15);
- fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
- indent, "",
- regs->rip, regs->rflags);
-}
-
-static void segment_dump(FILE *stream, struct kvm_segment *segment,
- uint8_t indent)
-{
- fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
- "selector: 0x%.4x type: 0x%.2x\n",
- indent, "", segment->base, segment->limit,
- segment->selector, segment->type);
- fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
- "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
- indent, "", segment->present, segment->dpl,
- segment->db, segment->s, segment->l);
- fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
- "unusable: 0x%.2x padding: 0x%.2x\n",
- indent, "", segment->g, segment->avl,
- segment->unusable, segment->padding);
-}
-
-static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
- uint8_t indent)
-{
- fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
- "padding: 0x%.4x 0x%.4x 0x%.4x\n",
- indent, "", dtable->base, dtable->limit,
- dtable->padding[0], dtable->padding[1], dtable->padding[2]);
-}
-
-static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
-{
- unsigned int i;
-
- fprintf(stream, "%*scs:\n", indent, "");
- segment_dump(stream, &sregs->cs, indent + 2);
- fprintf(stream, "%*sds:\n", indent, "");
- segment_dump(stream, &sregs->ds, indent + 2);
- fprintf(stream, "%*ses:\n", indent, "");
- segment_dump(stream, &sregs->es, indent + 2);
- fprintf(stream, "%*sfs:\n", indent, "");
- segment_dump(stream, &sregs->fs, indent + 2);
- fprintf(stream, "%*sgs:\n", indent, "");
- segment_dump(stream, &sregs->gs, indent + 2);
- fprintf(stream, "%*sss:\n", indent, "");
- segment_dump(stream, &sregs->ss, indent + 2);
- fprintf(stream, "%*str:\n", indent, "");
- segment_dump(stream, &sregs->tr, indent + 2);
- fprintf(stream, "%*sldt:\n", indent, "");
- segment_dump(stream, &sregs->ldt, indent + 2);
-
- fprintf(stream, "%*sgdt:\n", indent, "");
- dtable_dump(stream, &sregs->gdt, indent + 2);
- fprintf(stream, "%*sidt:\n", indent, "");
- dtable_dump(stream, &sregs->idt, indent + 2);
-
- fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
- "cr3: 0x%.16llx cr4: 0x%.16llx\n",
- indent, "",
- sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
- fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
- "apic_base: 0x%.16llx\n",
- indent, "",
- sregs->cr8, sregs->efer, sregs->apic_base);
-
- fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
- for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
- fprintf(stream, "%*s%.16llx\n", indent + 2, "",
- sregs->interrupt_bitmap[i]);
- }
-}
-
-bool kvm_is_tdp_enabled(void)
-{
- if (host_cpu_is_intel)
- return get_kvm_intel_param_bool("ept");
- else
- return get_kvm_amd_param_bool("npt");
-}
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- /* If needed, create page map l4 table. */
- if (!vm->pgd_created) {
- vm->pgd = vm_alloc_page_table(vm);
- vm->pgd_created = true;
- }
-}
-
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
- uint64_t vaddr, int level)
-{
- uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
- uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
- int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
-
- TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
- "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
- level + 1, vaddr);
-
- return &page_table[index];
-}
-
-static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
- uint64_t *parent_pte,
- uint64_t vaddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
-
- paddr = vm_untag_gpa(vm, paddr);
-
- if (!(*pte & PTE_PRESENT_MASK)) {
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
- if (current_level == target_level)
- *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
- else
- *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, vaddr: 0x%lx",
- current_level, vaddr);
- TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
- "Cannot create page table at level: %u, vaddr: 0x%lx",
- current_level, vaddr);
- }
- return pte;
-}
-
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
-{
- const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pml4e, *pdpe, *pde;
- uint64_t *pte;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
- "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % pg_size) == 0,
- "Virtual address not aligned,\n"
- "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx", vaddr);
- TEST_ASSERT((paddr % pg_size) == 0,
- "Physical address not aligned,\n"
- " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
- "Unexpected bits in paddr: %lx", paddr);
-
- /*
- * Allocate upper level page tables, if not already present. Return
- * early if a hugepage was created.
- */
- pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
- if (*pml4e & PTE_LARGE_MASK)
- return;
-
- pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
- if (*pdpe & PTE_LARGE_MASK)
- return;
-
- pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
- if (*pde & PTE_LARGE_MASK)
- return;
-
- /* Fill in page table entry. */
- pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
- TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
- "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-
- /*
- * Neither SEV nor TDX supports shared page tables, so only the final
- * leaf PTE needs manually set the C/S-bit.
- */
- if (vm_is_gpa_protected(vm, paddr))
- *pte |= vm->arch.c_bit;
- else
- *pte |= vm->arch.s_bit;
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
- __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
-}
-
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint64_t nr_bytes, int level)
-{
- uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t nr_pages = nr_bytes / pg_size;
- int i;
-
- TEST_ASSERT(nr_bytes % pg_size == 0,
- "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
- nr_bytes, pg_size);
-
- for (i = 0; i < nr_pages; i++) {
- __virt_pg_map(vm, vaddr, paddr, level);
-
- vaddr += pg_size;
- paddr += pg_size;
- }
-}
-
-static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
-{
- if (*pte & PTE_LARGE_MASK) {
- TEST_ASSERT(*level == PG_LEVEL_NONE ||
- *level == current_level,
- "Unexpected hugepage at level %d", current_level);
- *level = current_level;
- }
-
- return *level == current_level;
-}
-
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level)
-{
- uint64_t *pml4e, *pdpe, *pde;
-
- TEST_ASSERT(!vm->arch.is_pt_protected,
- "Walking page tables of protected guests is impossible");
-
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
- "Invalid PG_LEVEL_* '%d'", *level);
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx",
- vaddr);
- /*
- * Based on the mode check above there are 48 bits in the vaddr, so
- * shift 16 to sign extend the last bit (bit-47),
- */
- TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
- "Canonical check failed. The virtual address is invalid.");
-
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
- if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
- return pml4e;
-
- pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
- if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
- return pdpe;
-
- pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
- if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
- return pde;
-
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
-}
-
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
-{
- int level = PG_LEVEL_4K;
-
- return __vm_get_page_table_entry(vm, vaddr, &level);
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- uint64_t *pml4e, *pml4e_start;
- uint64_t *pdpe, *pdpe_start;
- uint64_t *pde, *pde_start;
- uint64_t *pte, *pte_start;
-
- if (!vm->pgd_created)
- return;
-
- fprintf(stream, "%*s "
- " no\n", indent, "");
- fprintf(stream, "%*s index hvaddr gpaddr "
- "addr w exec dirty\n",
- indent, "");
- pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
- for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
- pml4e = &pml4e_start[n1];
- if (!(*pml4e & PTE_PRESENT_MASK))
- continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
- " %u\n",
- indent, "",
- pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
- !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
-
- pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
- for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
- pdpe = &pdpe_start[n2];
- if (!(*pdpe & PTE_PRESENT_MASK))
- continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
- "%u %u\n",
- indent, "",
- pdpe - pdpe_start, pdpe,
- addr_hva2gpa(vm, pdpe),
- PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
- !!(*pdpe & PTE_NX_MASK));
-
- pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
- for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
- pde = &pde_start[n3];
- if (!(*pde & PTE_PRESENT_MASK))
- continue;
- fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10llx %u %u\n",
- indent, "", pde - pde_start, pde,
- addr_hva2gpa(vm, pde),
- PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
- !!(*pde & PTE_NX_MASK));
-
- pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
- for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
- pte = &pte_start[n4];
- if (!(*pte & PTE_PRESENT_MASK))
- continue;
- fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10llx %u %u "
- " %u 0x%-10lx\n",
- indent, "",
- pte - pte_start, pte,
- addr_hva2gpa(vm, pte),
- PTE_GET_PFN(*pte),
- !!(*pte & PTE_WRITABLE_MASK),
- !!(*pte & PTE_NX_MASK),
- !!(*pte & PTE_DIRTY_MASK),
- ((uint64_t) n1 << 27)
- | ((uint64_t) n2 << 18)
- | ((uint64_t) n3 << 9)
- | ((uint64_t) n4));
- }
- }
- }
- }
-}
-
-/*
- * Set Unusable Segment
- *
- * Input Args: None
- *
- * Output Args:
- * segp - Pointer to segment register
- *
- * Return: None
- *
- * Sets the segment register pointed to by @segp to an unusable state.
- */
-static void kvm_seg_set_unusable(struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->unusable = true;
-}
-
-static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
-{
- void *gdt = addr_gva2hva(vm, vm->arch.gdt);
- struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
-
- desc->limit0 = segp->limit & 0xFFFF;
- desc->base0 = segp->base & 0xFFFF;
- desc->base1 = segp->base >> 16;
- desc->type = segp->type;
- desc->s = segp->s;
- desc->dpl = segp->dpl;
- desc->p = segp->present;
- desc->limit1 = segp->limit >> 16;
- desc->avl = segp->avl;
- desc->l = segp->l;
- desc->db = segp->db;
- desc->g = segp->g;
- desc->base2 = segp->base >> 24;
- if (!segp->s)
- desc->base3 = segp->base >> 32;
-}
-
-static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->selector = KERNEL_CS;
- segp->limit = 0xFFFFFFFFu;
- segp->s = 0x1; /* kTypeCodeData */
- segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
- * | kFlagCodeReadable
- */
- segp->g = true;
- segp->l = true;
- segp->present = 1;
-}
-
-static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->selector = KERNEL_DS;
- segp->limit = 0xFFFFFFFFu;
- segp->s = 0x1; /* kTypeCodeData */
- segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
- * | kFlagDataWritable
- */
- segp->g = true;
- segp->present = true;
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- int level = PG_LEVEL_NONE;
- uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
-
- TEST_ASSERT(*pte & PTE_PRESENT_MASK,
- "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
-
- /*
- * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
- * address bits to be zero.
- */
- return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
-}
-
-static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->base = base;
- segp->limit = 0x67;
- segp->selector = KERNEL_TSS;
- segp->type = 0xb;
- segp->present = 1;
-}
-
-static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
- struct kvm_sregs sregs;
-
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
-
- /* Set mode specific system register values. */
- vcpu_sregs_get(vcpu, &sregs);
-
- sregs.idt.base = vm->arch.idt;
- sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
- sregs.gdt.base = vm->arch.gdt;
- sregs.gdt.limit = getpagesize() - 1;
-
- sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
- sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
- if (kvm_cpu_has(X86_FEATURE_XSAVE))
- sregs.cr4 |= X86_CR4_OSXSAVE;
- sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
-
- kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(&sregs.cs);
- kvm_seg_set_kernel_data_64bit(&sregs.ds);
- kvm_seg_set_kernel_data_64bit(&sregs.es);
- kvm_seg_set_kernel_data_64bit(&sregs.gs);
- kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
-
- sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vcpu, &sregs);
-}
-
-static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
- struct kvm_xcrs xcrs = {
- .nr_xcrs = 1,
- .xcrs[0].xcr = 0,
- .xcrs[0].value = kvm_cpu_supported_xcr0(),
- };
-
- if (!kvm_cpu_has(X86_FEATURE_XSAVE))
- return;
-
- vcpu_xcrs_set(vcpu, &xcrs);
-}
-
-static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
- int dpl, unsigned short selector)
-{
- struct idt_entry *base =
- (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
- struct idt_entry *e = &base[vector];
-
- memset(e, 0, sizeof(*e));
- e->offset0 = addr;
- e->selector = selector;
- e->ist = 0;
- e->type = 14;
- e->dpl = dpl;
- e->p = 1;
- e->offset1 = addr >> 16;
- e->offset2 = addr >> 32;
-}
-
-static bool kvm_fixup_exception(struct ex_regs *regs)
-{
- if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
- return false;
-
- if (regs->vector == DE_VECTOR)
- return false;
-
- regs->rip = regs->r11;
- regs->r9 = regs->vector;
- regs->r10 = regs->error_code;
- return true;
-}
-
-void route_exception(struct ex_regs *regs)
-{
- typedef void(*handler)(struct ex_regs *);
- handler *handlers = (handler *)exception_handlers;
-
- if (handlers && handlers[regs->vector]) {
- handlers[regs->vector](regs);
- return;
- }
-
- if (kvm_fixup_exception(regs))
- return;
-
- GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
- regs->vector, regs->rip);
-}
-
-static void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
- extern void *idt_handlers;
- struct kvm_segment seg;
- int i;
-
- vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
- /* Handlers have the same address in both address spaces.*/
- for (i = 0; i < NUM_INTERRUPTS; i++)
- set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
-
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-
- kvm_seg_set_kernel_code_64bit(&seg);
- kvm_seg_fill_gdt_64bit(vm, &seg);
-
- kvm_seg_set_kernel_data_64bit(&seg);
- kvm_seg_fill_gdt_64bit(vm, &seg);
-
- kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
- kvm_seg_fill_gdt_64bit(vm, &seg);
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
-{
- vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
-
- handlers[vector] = (vm_vaddr_t)handler;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- if (get_ucall(vcpu, &uc) == UCALL_ABORT)
- REPORT_GUEST_ASSERT(uc);
-}
-
-void kvm_arch_vm_post_create(struct kvm_vm *vm)
-{
- int r;
-
- TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
- "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
-
- vm_create_irqchip(vm);
- vm_init_descriptor_tables(vm);
-
- sync_global_to_guest(vm, host_cpu_is_intel);
- sync_global_to_guest(vm, host_cpu_is_amd);
- sync_global_to_guest(vm, is_forced_emulation_enabled);
-
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
- struct kvm_sev_init init = { 0 };
-
- vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
- }
-
- r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
- TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
- guest_tsc_khz = r;
- sync_global_to_guest(vm, guest_tsc_khz);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
- struct kvm_regs regs;
-
- vcpu_regs_get(vcpu, ®s);
- regs.rip = (unsigned long) guest_code;
- vcpu_regs_set(vcpu, ®s);
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- vm_vaddr_t stack_vaddr;
- struct kvm_vcpu *vcpu;
-
- stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
-
- stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
-
- /*
- * Align stack to match calling sequence requirements in section "The
- * Stack Frame" of the System V ABI AMD64 Architecture Processor
- * Supplement, which requires the value (%rsp + 8) to be a multiple of
- * 16 when control is transferred to the function entry point.
- *
- * If this code is ever used to launch a vCPU with 32-bit entry point it
- * may need to subtract 4 bytes instead of 8 bytes.
- */
- TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
- "__vm_vaddr_alloc() did not provide a page-aligned address");
- stack_vaddr -= 8;
-
- vcpu = __vm_vcpu_add(vm, vcpu_id);
- vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- vcpu_init_sregs(vm, vcpu);
- vcpu_init_xcrs(vm, vcpu);
-
- /* Setup guest general purpose registers */
- vcpu_regs_get(vcpu, ®s);
- regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr;
- vcpu_regs_set(vcpu, ®s);
-
- /* Setup the MP state */
- mp_state.mp_state = 0;
- vcpu_mp_state_set(vcpu, &mp_state);
-
- /*
- * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
- * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
- * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
- * is consistent with vCPU state.
- */
- vcpu_get_cpuid(vcpu);
- return vcpu;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
-{
- struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
-
- vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
-
- return vcpu;
-}
-
-void vcpu_arch_free(struct kvm_vcpu *vcpu)
-{
- if (vcpu->cpuid)
- free(vcpu->cpuid);
-}
-
-/* Do not use kvm_supported_cpuid directly except for validity checks. */
-static void *kvm_supported_cpuid;
-
-const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
- int kvm_fd;
-
- if (kvm_supported_cpuid)
- return kvm_supported_cpuid;
-
- kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
- kvm_fd = open_kvm_dev_path_or_exit();
-
- kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
- (struct kvm_cpuid2 *)kvm_supported_cpuid);
-
- close(kvm_fd);
- return kvm_supported_cpuid;
-}
-
-static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
- uint32_t function, uint32_t index,
- uint8_t reg, uint8_t lo, uint8_t hi)
-{
- const struct kvm_cpuid_entry2 *entry;
- int i;
-
- for (i = 0; i < cpuid->nent; i++) {
- entry = &cpuid->entries[i];
-
- /*
- * The output registers in kvm_cpuid_entry2 are in alphabetical
- * order, but kvm_x86_cpu_feature matches that mess, so yay
- * pointer shenanigans!
- */
- if (entry->function == function && entry->index == index)
- return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
- }
-
- return 0;
-}
-
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
- struct kvm_x86_cpu_feature feature)
-{
- return __kvm_cpu_has(cpuid, feature.function, feature.index,
- feature.reg, feature.bit, feature.bit);
-}
-
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
- struct kvm_x86_cpu_property property)
-{
- return __kvm_cpu_has(cpuid, property.function, property.index,
- property.reg, property.lo_bit, property.hi_bit);
-}
-
-uint64_t kvm_get_feature_msr(uint64_t msr_index)
-{
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
- int r, kvm_fd;
-
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- kvm_fd = open_kvm_dev_path_or_exit();
-
- r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
-
- close(kvm_fd);
- return buffer.entry.data;
-}
-
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
-{
- int kvm_fd;
- u64 bitmask;
- long rc;
- struct kvm_device_attr attr = {
- .group = 0,
- .attr = KVM_X86_XCOMP_GUEST_SUPP,
- .addr = (unsigned long) &bitmask,
- };
-
- TEST_ASSERT(!kvm_supported_cpuid,
- "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
-
- TEST_ASSERT(is_power_of_2(xfeature),
- "Dynamic XFeatures must be enabled one at a time");
-
- kvm_fd = open_kvm_dev_path_or_exit();
- rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
- close(kvm_fd);
-
- if (rc == -1 && (errno == ENXIO || errno == EINVAL))
- __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
-
- TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
-
- __TEST_REQUIRE(bitmask & xfeature,
- "Required XSAVE feature '%s' not supported", name);
-
- TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
-
- rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
- TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
- TEST_ASSERT(bitmask & xfeature,
- "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
- name, xfeature, bitmask);
-}
-
-void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
-{
- TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
-
- /* Allow overriding the default CPUID. */
- if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
- free(vcpu->cpuid);
- vcpu->cpuid = NULL;
- }
-
- if (!vcpu->cpuid)
- vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
-
- memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
- vcpu_set_cpuid(vcpu);
-}
-
-void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_property property,
- uint32_t value)
-{
- struct kvm_cpuid_entry2 *entry;
-
- entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
-
- (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
- (&entry->eax)[property.reg] |= value << property.lo_bit;
-
- vcpu_set_cpuid(vcpu);
-
- /* Sanity check that @value doesn't exceed the bounds in any way. */
- TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
-}
-
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
-{
- struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
-
- entry->eax = 0;
- entry->ebx = 0;
- entry->ecx = 0;
- entry->edx = 0;
- vcpu_set_cpuid(vcpu);
-}
-
-void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
- struct kvm_x86_cpu_feature feature,
- bool set)
-{
- struct kvm_cpuid_entry2 *entry;
- u32 *reg;
-
- entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
- reg = (&entry->eax) + feature.reg;
-
- if (set)
- *reg |= BIT(feature.bit);
- else
- *reg &= ~BIT(feature.bit);
-
- vcpu_set_cpuid(vcpu);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
-{
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
-
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
-
- vcpu_msrs_get(vcpu, &buffer.header);
-
- return buffer.entry.data;
-}
-
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
-{
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
-
- memset(&buffer, 0, sizeof(buffer));
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- buffer.entry.data = msr_value;
-
- return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
- va_list ap;
- struct kvm_regs regs;
-
- TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
- " num: %u",
- num);
-
- va_start(ap, num);
- vcpu_regs_get(vcpu, ®s);
-
- if (num >= 1)
- regs.rdi = va_arg(ap, uint64_t);
-
- if (num >= 2)
- regs.rsi = va_arg(ap, uint64_t);
-
- if (num >= 3)
- regs.rdx = va_arg(ap, uint64_t);
-
- if (num >= 4)
- regs.rcx = va_arg(ap, uint64_t);
-
- if (num >= 5)
- regs.r8 = va_arg(ap, uint64_t);
-
- if (num >= 6)
- regs.r9 = va_arg(ap, uint64_t);
-
- vcpu_regs_set(vcpu, ®s);
- va_end(ap);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
- struct kvm_regs regs;
- struct kvm_sregs sregs;
-
- fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
-
- fprintf(stream, "%*sregs:\n", indent + 2, "");
- vcpu_regs_get(vcpu, ®s);
- regs_dump(stream, ®s, indent + 4);
-
- fprintf(stream, "%*ssregs:\n", indent + 2, "");
- vcpu_sregs_get(vcpu, &sregs);
- sregs_dump(stream, &sregs, indent + 4);
-}
-
-static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
-{
- struct kvm_msr_list *list;
- struct kvm_msr_list nmsrs;
- int kvm_fd, r;
-
- kvm_fd = open_kvm_dev_path_or_exit();
-
- nmsrs.nmsrs = 0;
- if (!feature_msrs)
- r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
- else
- r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
-
- TEST_ASSERT(r == -1 && errno == E2BIG,
- "Expected -E2BIG, got rc: %i errno: %i (%s)",
- r, errno, strerror(errno));
-
- list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
- TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
- list->nmsrs = nmsrs.nmsrs;
-
- if (!feature_msrs)
- kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- else
- kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
- close(kvm_fd);
-
- TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
- "Number of MSRs in list changed, was %d, now %d",
- nmsrs.nmsrs, list->nmsrs);
- return list;
-}
-
-const struct kvm_msr_list *kvm_get_msr_index_list(void)
-{
- static const struct kvm_msr_list *list;
-
- if (!list)
- list = __kvm_get_msr_index_list(false);
- return list;
-}
-
-
-const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
-{
- static const struct kvm_msr_list *list;
-
- if (!list)
- list = __kvm_get_msr_index_list(true);
- return list;
-}
-
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
-{
- const struct kvm_msr_list *list = kvm_get_msr_index_list();
- int i;
-
- for (i = 0; i < list->nmsrs; ++i) {
- if (list->indices[i] == msr_index)
- return true;
- }
-
- return false;
-}
-
-static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
- struct kvm_x86_state *state)
-{
- int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
-
- if (size) {
- state->xsave = malloc(size);
- vcpu_xsave2_get(vcpu, state->xsave);
- } else {
- state->xsave = malloc(sizeof(struct kvm_xsave));
- vcpu_xsave_get(vcpu, state->xsave);
- }
-}
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
-{
- const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
- struct kvm_x86_state *state;
- int i;
-
- static int nested_size = -1;
-
- if (nested_size == -1) {
- nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
- TEST_ASSERT(nested_size <= sizeof(state->nested_),
- "Nested state size too big, %i > %zi",
- nested_size, sizeof(state->nested_));
- }
-
- /*
- * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
- * guest state is consistent only after userspace re-enters the
- * kernel with KVM_RUN. Complete IO prior to migrating state
- * to a new VM.
- */
- vcpu_run_complete_io(vcpu);
-
- state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
- TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
-
- vcpu_events_get(vcpu, &state->events);
- vcpu_mp_state_get(vcpu, &state->mp_state);
- vcpu_regs_get(vcpu, &state->regs);
- vcpu_save_xsave_state(vcpu, state);
-
- if (kvm_has_cap(KVM_CAP_XCRS))
- vcpu_xcrs_get(vcpu, &state->xcrs);
-
- vcpu_sregs_get(vcpu, &state->sregs);
-
- if (nested_size) {
- state->nested.size = sizeof(state->nested_);
-
- vcpu_nested_state_get(vcpu, &state->nested);
- TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
- } else {
- state->nested.size = 0;
- }
-
- state->msrs.nmsrs = msr_list->nmsrs;
- for (i = 0; i < msr_list->nmsrs; i++)
- state->msrs.entries[i].index = msr_list->indices[i];
- vcpu_msrs_get(vcpu, &state->msrs);
-
- vcpu_debugregs_get(vcpu, &state->debugregs);
-
- return state;
-}
-
-void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
-{
- vcpu_sregs_set(vcpu, &state->sregs);
- vcpu_msrs_set(vcpu, &state->msrs);
-
- if (kvm_has_cap(KVM_CAP_XCRS))
- vcpu_xcrs_set(vcpu, &state->xcrs);
-
- vcpu_xsave_set(vcpu, state->xsave);
- vcpu_events_set(vcpu, &state->events);
- vcpu_mp_state_set(vcpu, &state->mp_state);
- vcpu_debugregs_set(vcpu, &state->debugregs);
- vcpu_regs_set(vcpu, &state->regs);
-
- if (state->nested.size)
- vcpu_nested_state_set(vcpu, &state->nested);
-}
-
-void kvm_x86_state_cleanup(struct kvm_x86_state *state)
-{
- free(state->xsave);
- free(state);
-}
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
-{
- if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
- *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
- *va_bits = 32;
- } else {
- *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
- *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
- }
-}
-
-void kvm_init_vm_address_properties(struct kvm_vm *vm)
-{
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
- vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
- vm->gpa_tag_mask = vm->arch.c_bit;
- } else {
- vm->arch.sev_fd = -1;
- }
-}
-
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
- uint32_t function, uint32_t index)
-{
- int i;
-
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == function &&
- cpuid->entries[i].index == index)
- return &cpuid->entries[i];
- }
-
- TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
-
- return NULL;
-}
-
-#define X86_HYPERCALL(inputs...) \
-({ \
- uint64_t r; \
- \
- asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \
- "jnz 1f\n\t" \
- "vmcall\n\t" \
- "jmp 2f\n\t" \
- "1: vmmcall\n\t" \
- "2:" \
- : "=a"(r) \
- : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
- \
- r; \
-})
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
- uint64_t a3)
-{
- return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
-}
-
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
-{
- return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
-}
-
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
-{
- GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
-}
-
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
-{
- const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
- unsigned long ht_gfn, max_gfn, max_pfn;
- uint8_t maxphyaddr, guest_maxphyaddr;
-
- /*
- * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
- * enumerates the max _mappable_ GPA, which can be less than the raw
- * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
- * doesn't support 5-level TDP.
- */
- guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
- guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
- TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
- "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
-
- max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
-
- /* Avoid reserved HyperTransport region on AMD processors. */
- if (!host_cpu_is_amd)
- return max_gfn;
-
- /* On parts with <40 physical address bits, the area is fully hidden */
- if (vm->pa_bits < 40)
- return max_gfn;
-
- /* Before family 17h, the HyperTransport area is just below 1T. */
- ht_gfn = (1 << 28) - num_ht_pages;
- if (this_cpu_family() < 0x17)
- goto done;
-
- /*
- * Otherwise it's at the top of the physical address space, possibly
- * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
- * the old conservative value if MAXPHYADDR is not enumerated.
- */
- if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
- goto done;
-
- maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
- max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
-
- if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
- max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
-
- ht_gfn = max_pfn - num_ht_pages;
-done:
- return min(max_gfn, ht_gfn - 1);
-}
-
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- /* Ensure that a KVM vendor-specific module is loaded. */
- if (vm == NULL)
- close(open_kvm_dev_path_or_exit());
-
- return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
-void kvm_selftest_arch_init(void)
-{
- host_cpu_is_intel = this_cpu_is_intel();
- host_cpu_is_amd = this_cpu_is_amd();
- is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
-}
-
-bool sys_clocksource_is_based_on_tsc(void)
-{
- char *clk_name = sys_get_cur_clocksource();
- bool ret = !strcmp(clk_name, "tsc\n") ||
- !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
-
- free(clk_name);
-
- return ret;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "sev.h"
-
-/*
- * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
- * -1 would then cause an underflow back to 2**64 - 1. This is expected and
- * correct.
- *
- * If the last range in the sparsebit is [x, y] and we try to iterate,
- * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
- * and find the first range, but that's correct because the condition
- * expression would cause us to quit the loop.
- */
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
-{
- const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
- const vm_paddr_t gpa_base = region->region.guest_phys_addr;
- const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
- sparsebit_idx_t i, j;
-
- if (!sparsebit_any_set(protected_phy_pages))
- return;
-
- sev_register_encrypted_memory(vm, region);
-
- sparsebit_for_each_set_range(protected_phy_pages, i, j) {
- const uint64_t size = (j - i + 1) * vm->page_size;
- const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
-
- sev_launch_update_data(vm, gpa_base + offset, size);
- }
-}
-
-void sev_vm_init(struct kvm_vm *vm)
-{
- if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
- vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
- } else {
- struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_VM);
- vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
- }
-}
-
-void sev_es_vm_init(struct kvm_vm *vm)
-{
- if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
- vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
- } else {
- struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_ES_VM);
- vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
- }
-}
-
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
-{
- struct kvm_sev_launch_start launch_start = {
- .policy = policy,
- };
- struct userspace_mem_region *region;
- struct kvm_sev_guest_status status;
- int ctr;
-
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
- vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-
- TEST_ASSERT_EQ(status.policy, policy);
- TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
-
- hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
- encrypt_region(vm, region);
-
- if (policy & SEV_POLICY_ES)
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-
- vm->arch.is_pt_protected = true;
-}
-
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
-{
- struct kvm_sev_launch_measure launch_measure;
- struct kvm_sev_guest_status guest_status;
-
- launch_measure.len = 256;
- launch_measure.uaddr = (__u64)measurement;
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
-
- vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
- TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
-}
-
-void sev_vm_launch_finish(struct kvm_vm *vm)
-{
- struct kvm_sev_guest_status status;
-
- vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
- TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
- status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
- "Unexpected guest state: %d", status.state);
-
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
-
- vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
- TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
-}
-
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
- struct kvm_vcpu **cpu)
-{
- struct vm_shape shape = {
- .mode = VM_MODE_DEFAULT,
- .type = type,
- };
- struct kvm_vm *vm;
- struct kvm_vcpu *cpus[1];
-
- vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
- *cpu = cpus[0];
-
- return vm;
-}
-
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
-{
- sev_vm_launch(vm, policy);
-
- if (!measurement)
- measurement = alloca(256);
-
- sev_vm_launch_measure(vm, measurement);
-
- sev_vm_launch_finish(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/svm.c
- * Helpers used for nested SVM testing
- * Largely inspired from KVM unit test svm.c
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-#define SEV_DEV_PATH "/dev/sev"
-
-struct gpr64_regs guest_regs;
-u64 rflags;
-
-/* Allocate memory regions for nested SVM tests.
- *
- * Input Args:
- * vm - The VM to allocate guest-virtual addresses in.
- *
- * Output Args:
- * p_svm_gva - The guest virtual address for the struct svm_test_data.
- *
- * Return:
- * Pointer to structure with the addresses of the SVM areas.
- */
-struct svm_test_data *
-vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
-{
- vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
- struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
-
- svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
- svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
- svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
-
- svm->save_area = (void *)vm_vaddr_alloc_page(vm);
- svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
- svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
-
- svm->msr = (void *)vm_vaddr_alloc_page(vm);
- svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
- svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
- memset(svm->msr_hva, 0, getpagesize());
-
- *p_svm_gva = svm_gva;
- return svm;
-}
-
-static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
- u64 base, u32 limit, u32 attr)
-{
- seg->selector = selector;
- seg->attrib = attr;
- seg->limit = limit;
- seg->base = base;
-}
-
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
-{
- struct vmcb *vmcb = svm->vmcb;
- uint64_t vmcb_gpa = svm->vmcb_gpa;
- struct vmcb_save_area *save = &vmcb->save;
- struct vmcb_control_area *ctrl = &vmcb->control;
- u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
- | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
- u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
- | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
- uint64_t efer;
-
- efer = rdmsr(MSR_EFER);
- wrmsr(MSR_EFER, efer | EFER_SVME);
- wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
-
- memset(vmcb, 0, sizeof(*vmcb));
- asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
- vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
- vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
- vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
- vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
- vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
- vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
-
- ctrl->asid = 1;
- save->cpl = 0;
- save->efer = rdmsr(MSR_EFER);
- asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
- asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
- asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
- asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
- asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
- asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
- save->g_pat = rdmsr(MSR_IA32_CR_PAT);
- save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
- ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
- (1ULL << INTERCEPT_VMMCALL);
- ctrl->msrpm_base_pa = svm->msr_gpa;
-
- vmcb->save.rip = (u64)guest_rip;
- vmcb->save.rsp = (u64)guest_rsp;
- guest_regs.rdi = (u64)svm;
-}
-
-/*
- * save/restore 64-bit general registers except rax, rip, rsp
- * which are directly handed through the VMCB guest processor state
- */
-#define SAVE_GPR_C \
- "xchg %%rbx, guest_regs+0x20\n\t" \
- "xchg %%rcx, guest_regs+0x10\n\t" \
- "xchg %%rdx, guest_regs+0x18\n\t" \
- "xchg %%rbp, guest_regs+0x30\n\t" \
- "xchg %%rsi, guest_regs+0x38\n\t" \
- "xchg %%rdi, guest_regs+0x40\n\t" \
- "xchg %%r8, guest_regs+0x48\n\t" \
- "xchg %%r9, guest_regs+0x50\n\t" \
- "xchg %%r10, guest_regs+0x58\n\t" \
- "xchg %%r11, guest_regs+0x60\n\t" \
- "xchg %%r12, guest_regs+0x68\n\t" \
- "xchg %%r13, guest_regs+0x70\n\t" \
- "xchg %%r14, guest_regs+0x78\n\t" \
- "xchg %%r15, guest_regs+0x80\n\t"
-
-#define LOAD_GPR_C SAVE_GPR_C
-
-/*
- * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
- * for now. registers involved in LOAD/SAVE_GPR_C are eventually
- * unmodified so they do not need to be in the clobber list.
- */
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
-{
- asm volatile (
- "vmload %[vmcb_gpa]\n\t"
- "mov rflags, %%r15\n\t" // rflags
- "mov %%r15, 0x170(%[vmcb])\n\t"
- "mov guest_regs, %%r15\n\t" // rax
- "mov %%r15, 0x1f8(%[vmcb])\n\t"
- LOAD_GPR_C
- "vmrun %[vmcb_gpa]\n\t"
- SAVE_GPR_C
- "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
- "mov %%r15, rflags\n\t"
- "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
- "mov %%r15, guest_regs\n\t"
- "vmsave %[vmcb_gpa]\n\t"
- : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
- : "r15", "memory");
-}
-
-/*
- * Open SEV_DEV_PATH if available, otherwise exit the entire program.
- *
- * Return:
- * The opened file descriptor of /dev/sev.
- */
-int open_sev_dev_path_or_exit(void)
-{
- return open_path_or_exit(SEV_DEV_PATH, 0);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
- /*
- * FIXME: Revert this hack (the entire commit that added it) once nVMX
- * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g.
- * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
- * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP
- * in particular is problematic) along with RDX and RDI (which are
- * inputs), and clobber volatile GPRs. *sigh*
- */
-#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
- "rcx", "rsi", "r8", "r9", "r10", "r11"
-
- asm volatile("push %%rbp\n\t"
- "push %%r15\n\t"
- "push %%r14\n\t"
- "push %%r13\n\t"
- "push %%r12\n\t"
- "push %%rbx\n\t"
- "push %%rdx\n\t"
- "push %%rdi\n\t"
- "in %[port], %%al\n\t"
- "pop %%rdi\n\t"
- "pop %%rdx\n\t"
- "pop %%rbx\n\t"
- "pop %%r12\n\t"
- "pop %%r13\n\t"
- "pop %%r14\n\t"
- "pop %%r15\n\t"
- "pop %%rbp\n\t"
- : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
- HORRIFIC_L2_UCALL_CLOBBER_HACK);
-}
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
-
- if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
- struct kvm_regs regs;
-
- vcpu_regs_get(vcpu, ®s);
- return (void *)regs.rdi;
- }
- return NULL;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/vmx.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include <asm/msr-index.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define PAGE_SHIFT_4K 12
-
-#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
-
-bool enable_evmcs;
-
-struct hv_enlightened_vmcs *current_evmcs;
-struct hv_vp_assist_page *current_vp_assist;
-
-struct eptPageTableEntry {
- uint64_t readable:1;
- uint64_t writable:1;
- uint64_t executable:1;
- uint64_t memory_type:3;
- uint64_t ignore_pat:1;
- uint64_t page_size:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t ignored_11_10:2;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t suppress_ve:1;
-};
-
-struct eptPageTablePointer {
- uint64_t memory_type:3;
- uint64_t page_walk_length:3;
- uint64_t ad_enabled:1;
- uint64_t reserved_11_07:5;
- uint64_t address:40;
- uint64_t reserved_63_52:12;
-};
-int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
-{
- uint16_t evmcs_ver;
-
- vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- (unsigned long)&evmcs_ver);
-
- /* KVM should return supported EVMCS version range */
- TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
- (evmcs_ver & 0xff) > 0,
- "Incorrect EVMCS version range: %x:%x",
- evmcs_ver & 0xff, evmcs_ver >> 8);
-
- return evmcs_ver;
-}
-
-/* Allocate memory regions for nested VMX tests.
- *
- * Input Args:
- * vm - The VM to allocate guest-virtual addresses in.
- *
- * Output Args:
- * p_vmx_gva - The guest virtual address for the struct vmx_pages.
- *
- * Return:
- * Pointer to structure with the addresses of the VMX areas.
- */
-struct vmx_pages *
-vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
-{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
- struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
-
- /* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
- vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
- vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
-
- /* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
- vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
- vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
-
- /* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc_page(vm);
- vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
- vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
- memset(vmx->msr_hva, 0, getpagesize());
-
- /* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
- vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
- vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
-
- /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
- vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
- vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
- memset(vmx->vmread_hva, 0, getpagesize());
-
- vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
- vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
- vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
- memset(vmx->vmwrite_hva, 0, getpagesize());
-
- *p_vmx_gva = vmx_gva;
- return vmx;
-}
-
-bool prepare_for_vmx_operation(struct vmx_pages *vmx)
-{
- uint64_t feature_control;
- uint64_t required;
- unsigned long cr0;
- unsigned long cr4;
-
- /*
- * Ensure bits in CR0 and CR4 are valid in VMX operation:
- * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
- * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
- */
- __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
- cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
- cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
- __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
-
- __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
- cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
- cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
- /* Enable VMX operation */
- cr4 |= X86_CR4_VMXE;
- __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
-
- /*
- * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
- * Bit 0: Lock bit. If clear, VMXON causes a #GP.
- * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
- * outside of SMX causes a #GP.
- */
- required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
- required |= FEAT_CTL_LOCKED;
- feature_control = rdmsr(MSR_IA32_FEAT_CTL);
- if ((feature_control & required) != required)
- wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
-
- /* Enter VMX root operation. */
- *(uint32_t *)(vmx->vmxon) = vmcs_revision();
- if (vmxon(vmx->vmxon_gpa))
- return false;
-
- return true;
-}
-
-bool load_vmcs(struct vmx_pages *vmx)
-{
- /* Load a VMCS. */
- *(uint32_t *)(vmx->vmcs) = vmcs_revision();
- if (vmclear(vmx->vmcs_gpa))
- return false;
-
- if (vmptrld(vmx->vmcs_gpa))
- return false;
-
- /* Setup shadow VMCS, do not load it yet. */
- *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
- if (vmclear(vmx->shadow_vmcs_gpa))
- return false;
-
- return true;
-}
-
-static bool ept_vpid_cap_supported(uint64_t mask)
-{
- return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
-}
-
-bool ept_1g_pages_supported(void)
-{
- return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
-}
-
-/*
- * Initialize the control fields to the most basic settings possible.
- */
-static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
-{
- uint32_t sec_exec_ctl = 0;
-
- vmwrite(VIRTUAL_PROCESSOR_ID, 0);
- vmwrite(POSTED_INTR_NV, 0);
-
- vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
-
- if (vmx->eptp_gpa) {
- uint64_t ept_paddr;
- struct eptPageTablePointer eptp = {
- .memory_type = X86_MEMTYPE_WB,
- .page_walk_length = 3, /* + 1 */
- .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
- .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
- };
-
- memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
- vmwrite(EPT_POINTER, ept_paddr);
- sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
- }
-
- if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
- vmwrite(CPU_BASED_VM_EXEC_CONTROL,
- rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
- else {
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
- GUEST_ASSERT(!sec_exec_ctl);
- }
-
- vmwrite(EXCEPTION_BITMAP, 0);
- vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
- vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
- vmwrite(CR3_TARGET_COUNT, 0);
- vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
- VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
- vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
- vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
- vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
- VM_ENTRY_IA32E_MODE); /* 64-bit guest */
- vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
- vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
- vmwrite(TPR_THRESHOLD, 0);
-
- vmwrite(CR0_GUEST_HOST_MASK, 0);
- vmwrite(CR4_GUEST_HOST_MASK, 0);
- vmwrite(CR0_READ_SHADOW, get_cr0());
- vmwrite(CR4_READ_SHADOW, get_cr4());
-
- vmwrite(MSR_BITMAP, vmx->msr_gpa);
- vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
- vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
-}
-
-/*
- * Initialize the host state fields based on the current host state, with
- * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
- * or vmresume.
- */
-static inline void init_vmcs_host_state(void)
-{
- uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
-
- vmwrite(HOST_ES_SELECTOR, get_es());
- vmwrite(HOST_CS_SELECTOR, get_cs());
- vmwrite(HOST_SS_SELECTOR, get_ss());
- vmwrite(HOST_DS_SELECTOR, get_ds());
- vmwrite(HOST_FS_SELECTOR, get_fs());
- vmwrite(HOST_GS_SELECTOR, get_gs());
- vmwrite(HOST_TR_SELECTOR, get_tr());
-
- if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
- vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
- if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
- vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
- if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
- vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
- rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
-
- vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
-
- vmwrite(HOST_CR0, get_cr0());
- vmwrite(HOST_CR3, get_cr3());
- vmwrite(HOST_CR4, get_cr4());
- vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
- vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
- vmwrite(HOST_TR_BASE,
- get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
- vmwrite(HOST_GDTR_BASE, get_gdt().address);
- vmwrite(HOST_IDTR_BASE, get_idt().address);
- vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
- vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
-}
-
-/*
- * Initialize the guest state fields essentially as a clone of
- * the host state fields. Some host state fields have fixed
- * values, and we set the corresponding guest state fields accordingly.
- */
-static inline void init_vmcs_guest_state(void *rip, void *rsp)
-{
- vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
- vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
- vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
- vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
- vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
- vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
- vmwrite(GUEST_LDTR_SELECTOR, 0);
- vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
- vmwrite(GUEST_INTR_STATUS, 0);
- vmwrite(GUEST_PML_INDEX, 0);
-
- vmwrite(VMCS_LINK_POINTER, -1ll);
- vmwrite(GUEST_IA32_DEBUGCTL, 0);
- vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
- vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
- vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
- vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
-
- vmwrite(GUEST_ES_LIMIT, -1);
- vmwrite(GUEST_CS_LIMIT, -1);
- vmwrite(GUEST_SS_LIMIT, -1);
- vmwrite(GUEST_DS_LIMIT, -1);
- vmwrite(GUEST_FS_LIMIT, -1);
- vmwrite(GUEST_GS_LIMIT, -1);
- vmwrite(GUEST_LDTR_LIMIT, -1);
- vmwrite(GUEST_TR_LIMIT, 0x67);
- vmwrite(GUEST_GDTR_LIMIT, 0xffff);
- vmwrite(GUEST_IDTR_LIMIT, 0xffff);
- vmwrite(GUEST_ES_AR_BYTES,
- vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
- vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
- vmwrite(GUEST_SS_AR_BYTES, 0xc093);
- vmwrite(GUEST_DS_AR_BYTES,
- vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
- vmwrite(GUEST_FS_AR_BYTES,
- vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
- vmwrite(GUEST_GS_AR_BYTES,
- vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
- vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
- vmwrite(GUEST_TR_AR_BYTES, 0x8b);
- vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
- vmwrite(GUEST_ACTIVITY_STATE, 0);
- vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
- vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
-
- vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
- vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
- vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
- vmwrite(GUEST_ES_BASE, 0);
- vmwrite(GUEST_CS_BASE, 0);
- vmwrite(GUEST_SS_BASE, 0);
- vmwrite(GUEST_DS_BASE, 0);
- vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
- vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
- vmwrite(GUEST_LDTR_BASE, 0);
- vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
- vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
- vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
- vmwrite(GUEST_DR7, 0x400);
- vmwrite(GUEST_RSP, (uint64_t)rsp);
- vmwrite(GUEST_RIP, (uint64_t)rip);
- vmwrite(GUEST_RFLAGS, 2);
- vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
- vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
- vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
-}
-
-void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
-{
- init_vmcs_control_fields(vmx);
- init_vmcs_host_state();
- init_vmcs_guest_state(guest_rip, guest_rsp);
-}
-
-static void nested_create_pte(struct kvm_vm *vm,
- struct eptPageTableEntry *pte,
- uint64_t nested_paddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- if (!pte->readable) {
- pte->writable = true;
- pte->readable = true;
- pte->executable = true;
- pte->page_size = (current_level == target_level);
- if (pte->page_size)
- pte->address = paddr >> vm->page_shift;
- else
- pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- }
-}
-
-
-void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, int target_level)
-{
- const uint64_t page_size = PG_LEVEL_SIZE(target_level);
- struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
- uint16_t index;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx requires 5-level paging",
- nested_paddr);
- TEST_ASSERT((nested_paddr % page_size) == 0,
- "Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx page_size: 0x%lx",
- nested_paddr, page_size);
- TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx page_size: 0x%lx",
- paddr, page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
- index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- pte = &pt[index];
-
- nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
- if (pte->page_size)
- break;
-
- pt = addr_gpa2hva(vm, pte->address * vm->page_size);
- }
-
- /*
- * For now mark these as accessed and dirty because the only
- * testcase we have needs that. Can be reconsidered later.
- */
- pte->accessed = true;
- pte->dirty = true;
-
-}
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
-{
- __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
-}
-
-/*
- * Map a range of EPT guest physical addresses to the VM's physical address
- *
- * Input Args:
- * vm - Virtual Machine
- * nested_paddr - Nested guest physical address to map
- * paddr - VM Physical Address
- * size - The size of the range to map
- * level - The level at which to map the range
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a nested guest translation for the
- * page range starting at nested_paddr to the page range starting at paddr.
- */
-void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- int level)
-{
- size_t page_size = PG_LEVEL_SIZE(level);
- size_t npages = size / page_size;
-
- TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
- TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
-
- while (npages--) {
- __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
- nested_paddr += page_size;
- paddr += page_size;
- }
-}
-
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
-{
- __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
-}
-
-/* Prepare an identity extended page table that maps all the
- * physical pages in VM.
- */
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot)
-{
- sparsebit_idx_t i, last;
- struct userspace_mem_region *region =
- memslot2region(vm, memslot);
-
- i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
- last = i + (region->region.memory_size >> vm->page_shift);
- for (;;) {
- i = sparsebit_next_clear(region->unused_phy_pages, i);
- if (i > last)
- break;
-
- nested_map(vmx, vm,
- (uint64_t)i << vm->page_shift,
- (uint64_t)i << vm->page_shift,
- 1 << vm->page_shift);
- }
-}
-
-/* Identity map a region with 1GiB Pages. */
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size)
-{
- __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
-}
-
-bool kvm_cpu_has_ept(void)
-{
- uint64_t ctrl;
-
- ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
- if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
- return false;
-
- ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
- return ctrl & SECONDARY_EXEC_ENABLE_EPT;
-}
-
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
-{
- TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
-
- vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
- vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
- vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
-}
-
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
- vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
- vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
- vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
-}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ * Nico Boehr <nrb@linux.ibm.com>
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN PAGE_SIZE
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+ asm volatile(
+ /* load TEST_DATA_START_GFN into r1 */
+ " llilf 1,%[start_gfn]\n"
+ /* calculate the address from the gfn */
+ " sllg 1,1,12(0)\n"
+ /* set the first page in TEST_DATA memslot to STABLE */
+ " .insn rrf,0xb9ab0000,2,1,1,0\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN)
+ : "r1", "r2", "memory", "cc"
+ );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+ asm volatile(
+ /* r1 = TEST_DATA_START_GFN */
+ " xgr 1,1\n"
+ " llilf 1,%[start_gfn]\n"
+ /* r5 = TEST_DATA_PAGE_COUNT */
+ " lghi 5,%[page_count]\n"
+ /* r5 += r1 */
+ "2: agfr 5,1\n"
+ /* r2 = r1 << PAGE_SHIFT */
+ "1: sllg 2,1,12(0)\n"
+ /* essa(r4, r2, SET_STABLE) */
+ " .insn rrf,0xb9ab0000,4,2,1,0\n"
+ /* i++ */
+ " agfi 1,1\n"
+ /* if r1 < r5 goto 1 */
+ " cgrjl 1,5,1b\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN),
+ [page_count] "L"(TEST_DATA_PAGE_COUNT)
+ :
+ /* the counter in our loop over the pages */
+ "r1",
+ /* the calculated page physical address */
+ "r2",
+ /* ESSA output register */
+ "r4",
+ /* last page */
+ "r5",
+ "cc", "memory"
+ );
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+ int i;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+ /* set the array of memslots to zero like __vm_create does */
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_START_GFN << vm->page_shift,
+ TEST_DATA_MEMSLOT,
+ TEST_DATA_PAGE_COUNT,
+ 0
+ );
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+ /*
+ * Our VM has the following memory layout:
+ * +------+---------------------------+
+ * | GFN | Memslot |
+ * +------+---------------------------+
+ * | 0 | |
+ * | ... | MAIN (Code, Stack, ...) |
+ * | 511 | |
+ * +------+---------------------------+
+ * | 4096 | |
+ * | ... | TEST_DATA |
+ * | 4607 | |
+ * +------+---------------------------+
+ */
+ create_main_memslot(vm);
+ create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *slot0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_barebones();
+
+ create_memslots(vm);
+
+ finish_vm_setup(vm);
+
+ return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+ TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+ vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+ vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+ return __kvm_device_attr_set(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_START,
+ NULL
+ );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+ int r = __enable_migration_mode(vm);
+
+ TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+ u64 out;
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_STATUS,
+ &out
+ );
+ TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+ return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+ struct kvm_s390_cmma_log args;
+ int rc;
+
+ errno = 0;
+
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = flags,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+ *errno_out = errno;
+ return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+ int rc, errno_out;
+
+ /* GET_CMMA_BITS without CMMA enabled should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, ENXIO);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ vcpu_run(vcpu);
+
+ /* GET_CMMA_BITS without migration mode and without peeking should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
+
+ /* GET_CMMA_BITS without migration mode and with peeking should work */
+ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT_EQ(errno_out, 0);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* GET_CMMA_BITS with invalid flags */
+ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
+
+ kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+ int rc;
+
+ /* enabling migration mode on a VM without memory should fail */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ create_memslots(vm);
+ finish_vm_setup(vm);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /* migration mode when memslots have dirty tracking off should fail */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ /* enable dirty tracking */
+ enable_dirty_tracking(vm);
+
+ /* enabling migration mode should work now */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /* execute another ESSA instruction to see this goes fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * With migration mode on, create a new memslot with dirty tracking off.
+ * This should turn off migration mode.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_TWO_START_GFN << vm->page_shift,
+ TEST_DATA_TWO_MEMSLOT,
+ TEST_DATA_TWO_PAGE_COUNT,
+ 0
+ );
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "creating memslot without dirty tracking turns off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * Turn on dirty tracking on the new memslot.
+ * It should be possible to turn migration mode back on again.
+ */
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /*
+ * Turn off dirty tracking again, this time with just a flag change.
+ * Again, migration mode should turn off.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "disabling dirty tracking should turn off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /*
+ * First iteration - everything should be dirty.
+ * Start at the main memslot...
+ */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+
+ /* ...and then - after a hole - the TEST_DATA memslot should follow */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = MAIN_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+ TEST_ASSERT_EQ(args.remaining, 0);
+
+ /* ...and nothing else should be there */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, 0);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+ TEST_ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /* If we start from GFN 0 again, nothing should be dirty. */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ if (args.count || args.remaining || args.start_gfn)
+ TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+ args.start_gfn,
+ args.count,
+ args.remaining
+ );
+}
+
+static void test_get_inital_dirty(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Start from the beginning again and make sure nothing else is dirty */
+ assert_no_pages_cmma_dirty(vm);
+
+ kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+ u64 start_gfn, u64 gfn_count,
+ struct kvm_s390_cmma_log *res_out)
+{
+ *res_out = (struct kvm_s390_cmma_log){
+ .start_gfn = start_gfn,
+ .count = gfn_count,
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+ u64 dirty_gfn_count,
+ const struct kvm_s390_cmma_log *res)
+{
+ TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+ TEST_ASSERT_EQ(res->count, dirty_gfn_count);
+ for (size_t i = 0; i < dirty_gfn_count; i++)
+ TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+ TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+ size_t gfn_offset;
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_s390_cmma_log log;
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute some essa instructions in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* un-dirty all pages */
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Then, dirty just the TEST_DATA memslot */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+
+ gfn_offset = TEST_DATA_START_GFN;
+ /**
+ * Query CMMA attributes of one page, starting at page 0. Since the
+ * main memslot was not touched by the VM, this should yield the first
+ * page of the TEST_DATA memslot.
+ * The dirty bitmap should now look like this:
+ * 0: not dirty
+ * [0x1, 0x200): dirty
+ */
+ query_cmma_range(vm, 0, 1, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+ * memslot. This should wrap back to the beginning of the TEST_DATA
+ * memslot, page 1.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /* Skip 32 pages */
+ gfn_offset += 0x20;
+
+ /**
+ * After skipping 32 pages, query the next 32 (0x20) pages.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, gfn_offset, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /**
+ * Query 1 page from the beginning of the TEST_DATA memslot. This should
+ * yield page 0x21.
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * [0x22, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+ * This should yield pages [0x23, 0x33).
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * 0x22: dirty
+ * [0x23, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x23;
+ query_cmma_range(vm, gfn_offset, 15, &log);
+ assert_cmma_dirty(gfn_offset, 15, &log);
+
+ /**
+ * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+ * This should yield page [0x22, 0x33)
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x22;
+ query_cmma_range(vm, gfn_offset, 17, &log);
+ assert_cmma_dirty(gfn_offset, 17, &log);
+
+ /**
+ * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+ * This should yield page 0x40 and nothing more, since there are more
+ * than 16 non-dirty pages after page 0x40.
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x40): dirty
+ * [0x40, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x40;
+ query_cmma_range(vm, gfn_offset, 25, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+
+ /**
+ * Query pages [0x33, 0x40).
+ * The dirty bitmap should now look like this:
+ * [0, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x33;
+ query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+ assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+ /**
+ * Query the remaining pages [0x61, 0x200).
+ */
+ gfn_offset = TEST_DATA_START_GFN;
+ query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+ assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "migration mode and dirty tracking", test_migration_mode },
+ { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+ { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+ { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+ kvm_vm_free(vm);
+
+ return r;
+}
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+ TEST_REQUIRE(machine_has_cmma());
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
--- /dev/null
+CONFIG_KVM=y
+CONFIG_KVM_S390_UCONTROL=y
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
+ * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
+ * query data reported via the CPU Model, allowing us to directly compare it with the data
+ * acquired through executing the queries in the test.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include "facility.h"
+
+#include "kvm_util.h"
+
+#define PLO_FUNCTION_MAX 256
+
+/* Query available CPU subfunctions */
+struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
+
+static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
+ struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
+{
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
+
+ TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+ unsigned long function = nr | 0x100;
+ int cc;
+
+ asm volatile(" lgr 0,%[function]\n"
+ /* Parameter registers are ignored for "test bit" */
+ " plo 0,0,0,0(0)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : [function] "d" (function)
+ : "cc", "0");
+ return cc == 0;
+}
+
+/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
+static void test_plo_asm_block(u8 (*query)[32])
+{
+ for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
+ if (plo_test_bit(i))
+ (*query)[i >> 3] |= 0x80 >> (i & 7);
+ }
+}
+
+/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
+static void test_kmac_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb91e0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
+static void test_kmc_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92f0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
+static void test_km_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92e0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
+static void test_kimd_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93e0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
+static void test_klmd_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93f0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
+static void test_kmctr_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rrf,0xb92d0000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
+static void test_kmf_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92a0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
+static void test_kmo_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92b0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
+static void test_pcc_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92c0000,0,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
+static void test_prno_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93c0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
+static void test_kma_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rrf,0xb9290000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
+static void test_kdsa_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93a0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
+static void test_sortl_asm_block(u8 (*query)[32])
+{
+ asm volatile(" lghi 0,0\n"
+ " la 1,%[query]\n"
+ " .insn rre,0xb9380000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "0", "1");
+}
+
+/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
+static void test_dfltcc_asm_block(u8 (*query)[32])
+{
+ asm volatile(" lghi 0,0\n"
+ " la 1,%[query]\n"
+ " .insn rrf,0xb9390000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "0", "1");
+}
+
+/*
+ * Testing Perform Function with Concurrent Results (PFCR)
+ * CPU subfunctions's ASM block
+ */
+static void test_pfcr_asm_block(u8 (*query)[16])
+{
+ asm volatile(" lghi 0,0\n"
+ " .insn rsy,0xeb0000000016,0,0,%[query]\n"
+ : [query] "=QS" (*query)
+ :
+ : "cc", "0");
+}
+
+typedef void (*testfunc_t)(u8 (*array)[]);
+
+struct testdef {
+ const char *subfunc_name;
+ u8 *subfunc_array;
+ size_t array_size;
+ testfunc_t test;
+ int facility_bit;
+} testlist[] = {
+ /*
+ * PLO was introduced in the very first 64-bit machine generation.
+ * Hence it is assumed PLO is always installed in Z Arch.
+ */
+ { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
+ /* MSA - Facility bit 17 */
+ { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
+ { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
+ { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
+ { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
+ { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
+ /* MSA - Facility bit 77 */
+ { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
+ { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
+ { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
+ { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
+ /* MSA5 - Facility bit 57 */
+ { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
+ /* MSA8 - Facility bit 146 */
+ { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
+ /* MSA9 - Facility bit 155 */
+ { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
+ /* SORTL - Facility bit 150 */
+ { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
+ /* DFLTCC - Facility bit 151 */
+ { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
+ /* Concurrent-function facility - Facility bit 201 */
+ { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
+};
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int idx;
+
+ ksft_print_header();
+
+ vm = vm_create(1);
+
+ memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
+ get_cpu_machine_subfuntions(vm, &cpu_subfunc);
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (test_facility(testlist[idx].facility_bit)) {
+ u8 *array = malloc(testlist[idx].array_size);
+
+ testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
+
+ TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
+ array, testlist[idx].array_size), 0);
+
+ ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
+ free(array);
+ } else {
+ ksft_test_result_skip("%s feature is not avaialable\n",
+ testlist[idx].subfunc_name);
+ }
+ }
+
+ kvm_vm_free(vm);
+ ksft_finished();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+ "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+ size_t new_psw_off, uint64_t *new_psw)
+{
+ struct kvm_guest_debug debug = {};
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ char *lowcore;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ lowcore = addr_gpa2hva(vm, 0);
+ new_psw[0] = (*vcpu)->run->psw_mask;
+ new_psw[1] = (uint64_t)int_handler;
+ memcpy(lowcore + new_psw_off, new_psw, 16);
+ vcpu_regs_get(*vcpu, ®s);
+ regs.gprs[2] = -1;
+ vcpu_regs_set(*vcpu, ®s);
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ vcpu_guest_debug_set(*vcpu, &debug);
+ vcpu_run(*vcpu);
+
+ return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+ ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+ "j .\n");
+
+static void test_step_pgm(void)
+{
+ test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = PGM_SPECIFICATION,
+ };
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+ __LC_PGM_NEW_PSW, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+ vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+ "iske %r2,%r2\n"
+ "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+ test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+ "lctl %c0,%c0,1\n"
+ "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+ test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+ "svc 0\n"
+ "j .\n");
+
+static void test_step_svc(void)
+{
+ test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "single-step pgm", test_step_pgm },
+ { "single-step pgm caused by diag", test_step_pgm_diag },
+ { "single-step pgm caused by iske", test_step_pgm_iske },
+ { "single-step pgm caused by lctl", test_step_pgm_lctl },
+ { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x KVM_S390_MEM_OP
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+enum mop_target {
+ LOGICAL,
+ SIDA,
+ ABSOLUTE,
+ INVALID,
+};
+
+enum mop_access_mode {
+ READ,
+ WRITE,
+ CMPXCHG,
+};
+
+struct mop_desc {
+ uintptr_t gaddr;
+ uintptr_t gaddr_v;
+ uint64_t set_flags;
+ unsigned int f_check : 1;
+ unsigned int f_inject : 1;
+ unsigned int f_key : 1;
+ unsigned int _gaddr_v : 1;
+ unsigned int _set_flags : 1;
+ unsigned int _sida_offset : 1;
+ unsigned int _ar : 1;
+ uint32_t size;
+ enum mop_target target;
+ enum mop_access_mode mode;
+ void *buf;
+ uint32_t sida_offset;
+ void *old;
+ uint8_t old_value[16];
+ bool *cmpxchg_success;
+ uint8_t ar;
+ uint8_t key;
+};
+
+const uint8_t NO_KEY = 0xff;
+
+static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = (uintptr_t)desc->gaddr,
+ .size = desc->size,
+ .buf = ((uintptr_t)desc->buf),
+ .reserved = "ignored_ignored_ignored_ignored"
+ };
+
+ switch (desc->target) {
+ case LOGICAL:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ break;
+ case SIDA:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+ break;
+ case ABSOLUTE:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+ if (desc->mode == CMPXCHG) {
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
+ ksmo.old_addr = (uint64_t)desc->old;
+ memcpy(desc->old_value, desc->old, desc->size);
+ }
+ break;
+ case INVALID:
+ ksmo.op = -1;
+ }
+ if (desc->f_check)
+ ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
+ if (desc->f_inject)
+ ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
+ if (desc->_set_flags)
+ ksmo.flags = desc->set_flags;
+ if (desc->f_key && desc->key != NO_KEY) {
+ ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
+ ksmo.key = desc->key;
+ }
+ if (desc->_ar)
+ ksmo.ar = desc->ar;
+ else
+ ksmo.ar = 0;
+ if (desc->_sida_offset)
+ ksmo.sida_offset = desc->sida_offset;
+
+ return ksmo;
+}
+
+struct test_info {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+};
+
+#define PRINT_MEMOP false
+static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
+{
+ if (!PRINT_MEMOP)
+ return;
+
+ if (!vcpu)
+ printf("vm memop(");
+ else
+ printf("vcpu memop(");
+ switch (ksmo->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ printf("LOGICAL, READ, ");
+ break;
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ printf("LOGICAL, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ printf("SIDA, READ, ");
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ printf("SIDA, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_READ:
+ printf("ABSOLUTE, READ, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+ printf("ABSOLUTE, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+ printf("ABSOLUTE, CMPXCHG, ");
+ break;
+ }
+ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
+ ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
+ ksmo->old_addr);
+ if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
+ printf(", CHECK_ONLY");
+ if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
+ printf(", INJECT_EXCEPTION");
+ if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
+ printf(", SKEY_PROTECTION");
+ puts(")");
+}
+
+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+ struct mop_desc *desc)
+{
+ struct kvm_vcpu *vcpu = info.vcpu;
+
+ if (!vcpu)
+ return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
+ else
+ return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
+}
+
+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+ struct mop_desc *desc)
+{
+ int r;
+
+ r = err_memop_ioctl(info, ksmo, desc);
+ if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
+ if (desc->cmpxchg_success) {
+ int diff = memcmp(desc->old_value, desc->old, desc->size);
+ *desc->cmpxchg_success = !diff;
+ }
+ }
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
+}
+
+#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \
+({ \
+ struct test_info __info = (info_p); \
+ struct mop_desc __desc = { \
+ .target = (mop_target_p), \
+ .mode = (access_mode_p), \
+ .buf = (buf_p), \
+ .size = (size_p), \
+ __VA_ARGS__ \
+ }; \
+ struct kvm_s390_mem_op __ksmo; \
+ \
+ if (__desc._gaddr_v) { \
+ if (__desc.target == ABSOLUTE) \
+ __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
+ else \
+ __desc.gaddr = __desc.gaddr_v; \
+ } \
+ __ksmo = ksmo_from_desc(&__desc); \
+ print_memop(__info.vcpu, &__ksmo); \
+ err##memop_ioctl(__info, &__ksmo, &__desc); \
+})
+
+#define MOP(...) MEMOP(, __VA_ARGS__)
+#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
+
+#define GADDR(a) .gaddr = ((uintptr_t)a)
+#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
+#define CHECK_ONLY .f_check = 1
+#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
+#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
+#define AR(a) ._ar = 1, .ar = (a)
+#define KEY(a) .f_key = 1, .key = (a)
+#define INJECT .f_inject = 1
+#define CMPXCHG_OLD(o) .old = (o)
+#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
+
+#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
+
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
+static uint8_t __aligned(PAGE_SIZE) mem1[65536];
+static uint8_t __aligned(PAGE_SIZE) mem2[65536];
+
+struct test_default {
+ struct kvm_vm *kvm_vm;
+ struct test_info vm;
+ struct test_info vcpu;
+ struct kvm_run *run;
+ int size;
+};
+
+static struct test_default test_default_init(void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+ struct test_default t;
+
+ t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
+ t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ t.vm = (struct test_info) { t.kvm_vm, NULL };
+ t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
+ t.run = vcpu->run;
+ return t;
+}
+
+enum stage {
+ /* Synced state set by host, e.g. DAT */
+ STAGE_INITED,
+ /* Guest did nothing */
+ STAGE_IDLED,
+ /* Guest set storage keys (specifics up to test case) */
+ STAGE_SKEYS_SET,
+ /* Guest copied memory (locations up to test case) */
+ STAGE_COPIED,
+ /* End of guest code reached */
+ STAGE_DONE,
+};
+
+#define HOST_SYNC(info_p, stage) \
+({ \
+ struct test_info __info = (info_p); \
+ struct kvm_vcpu *__vcpu = __info.vcpu; \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) { \
+ REPORT_GUEST_ASSERT(uc); \
+ } \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
+}) \
+
+static void prepare_mem12(void)
+{
+ int i;
+
+ for (i = 0; i < sizeof(mem1); i++)
+ mem1[i] = rand();
+ memset(mem2, 0xaa, sizeof(mem2));
+}
+
+#define ASSERT_MEM_EQ(p1, p2, size) \
+ TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
+
+static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
+ enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+ prepare_mem12();
+ CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
+ GADDR_V(mem1), KEY(key));
+ HOST_SYNC(copy_cpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+ GADDR_V(mem2), KEY(key));
+ ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
+ enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+ prepare_mem12();
+ CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
+ HOST_SYNC(copy_cpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+ GADDR_V(mem2), KEY(key));
+ ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_cmpxchg(struct test_default *test, uint8_t key)
+{
+ for (int size = 1; size <= 16; size *= 2) {
+ for (int offset = 0; offset < 16; offset += size) {
+ uint8_t __aligned(16) new[16] = {};
+ uint8_t __aligned(16) old[16];
+ bool succ;
+
+ prepare_mem12();
+ default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
+
+ memcpy(&old, mem1, 16);
+ MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+ size, GADDR_V(mem1 + offset),
+ CMPXCHG_OLD(old + offset),
+ CMPXCHG_SUCCESS(&succ), KEY(key));
+ HOST_SYNC(test->vcpu, STAGE_COPIED);
+ MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+ TEST_ASSERT(succ, "exchange of values should succeed");
+ memcpy(mem1 + offset, new + offset, size);
+ ASSERT_MEM_EQ(mem1, mem2, 16);
+
+ memcpy(&old, mem1, 16);
+ new[offset]++;
+ old[offset]++;
+ MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+ size, GADDR_V(mem1 + offset),
+ CMPXCHG_OLD(old + offset),
+ CMPXCHG_SUCCESS(&succ), KEY(key));
+ HOST_SYNC(test->vcpu, STAGE_COPIED);
+ MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+ TEST_ASSERT(!succ, "exchange of values should not succeed");
+ ASSERT_MEM_EQ(mem1, mem2, 16);
+ ASSERT_MEM_EQ(&old, mem1, 16);
+ }
+ }
+}
+
+static void guest_copy(void)
+{
+ GUEST_SYNC(STAGE_INITED);
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+}
+
+static void test_copy(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_access_register(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ prepare_mem12();
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+
+ /*
+ * Primary address space gets used if an access register
+ * contains zero. The host makes use of AR[1] so is a good
+ * candidate to ensure the guest AR (of zero) is used.
+ */
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
+ GADDR_V(mem1), AR(1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
+ GADDR_V(mem2), AR(1));
+ ASSERT_MEM_EQ(mem1, mem2, t.size);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+{
+ uintptr_t _addr, abs, i;
+ int not_mapped = 0;
+
+ _addr = (uintptr_t)addr;
+ for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+ abs = i;
+ asm volatile (
+ "lra %[abs], 0(0,%[abs])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[abs]\n"
+ "1:"
+ : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ GUEST_ASSERT_EQ(not_mapped, 0);
+ }
+}
+
+static void guest_copy_key(void)
+{
+ set_storage_key_range(mem1, sizeof(mem1), 0x90);
+ set_storage_key_range(mem2, sizeof(mem2), 0x90);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key);
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm, no key */
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
+
+ /* vm/vcpu, machting key or key 0 */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+ /*
+ * There used to be different code paths for key handling depending on
+ * if the region crossed a page boundary.
+ * There currently are not, but the more tests the merrier.
+ */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
+
+ /* vm/vcpu, mismatching keys on read, but no fetch protection */
+ default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+ default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_cmpxchg_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key);
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ default_cmpxchg(&t, NO_KEY);
+ default_cmpxchg(&t, 0);
+ default_cmpxchg(&t, 9);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static __uint128_t cut_to_size(int size, __uint128_t val)
+{
+ switch (size) {
+ case 1:
+ return (uint8_t)val;
+ case 2:
+ return (uint16_t)val;
+ case 4:
+ return (uint32_t)val;
+ case 8:
+ return (uint64_t)val;
+ case 16:
+ return val;
+ }
+ GUEST_FAIL("Invalid size = %u", size);
+ return 0;
+}
+
+static bool popcount_eq(__uint128_t a, __uint128_t b)
+{
+ unsigned int count_a, count_b;
+
+ count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
+ __builtin_popcountl((uint64_t)a);
+ count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
+ __builtin_popcountl((uint64_t)b);
+ return count_a == count_b;
+}
+
+static __uint128_t rotate(int size, __uint128_t val, int amount)
+{
+ unsigned int bits = size * 8;
+
+ amount = (amount + bits) % bits;
+ val = cut_to_size(size, val);
+ if (!amount)
+ return val;
+ return (val << (bits - amount)) | (val >> amount);
+}
+
+const unsigned int max_block = 16;
+
+static void choose_block(bool guest, int i, int *size, int *offset)
+{
+ unsigned int rand;
+
+ rand = i;
+ if (guest) {
+ rand = rand * 19 + 11;
+ *size = 1 << ((rand % 3) + 2);
+ rand = rand * 19 + 11;
+ *offset = (rand % max_block) & ~(*size - 1);
+ } else {
+ rand = rand * 17 + 5;
+ *size = 1 << (rand % 5);
+ rand = rand * 17 + 5;
+ *offset = (rand % max_block) & ~(*size - 1);
+ }
+}
+
+static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
+{
+ unsigned int rand;
+ int amount;
+ bool swap;
+
+ rand = i;
+ rand = rand * 3 + 1;
+ if (guest)
+ rand = rand * 3 + 1;
+ swap = rand % 2 == 0;
+ if (swap) {
+ int i, j;
+ __uint128_t new;
+ uint8_t byte0, byte1;
+
+ rand = rand * 3 + 1;
+ i = rand % size;
+ rand = rand * 3 + 1;
+ j = rand % size;
+ if (i == j)
+ return old;
+ new = rotate(16, old, i * 8);
+ byte0 = new & 0xff;
+ new &= ~0xff;
+ new = rotate(16, new, -i * 8);
+ new = rotate(16, new, j * 8);
+ byte1 = new & 0xff;
+ new = (new & ~0xff) | byte0;
+ new = rotate(16, new, -j * 8);
+ new = rotate(16, new, i * 8);
+ new = new | byte1;
+ new = rotate(16, new, -i * 8);
+ return new;
+ }
+ rand = rand * 3 + 1;
+ amount = rand % (size * 8);
+ return rotate(size, old, amount);
+}
+
+static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
+{
+ bool ret;
+
+ switch (size) {
+ case 4: {
+ uint32_t old = *old_addr;
+
+ asm volatile ("cs %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(uint32_t *)(target))
+ : [new] "d" ((uint32_t)new)
+ : "cc"
+ );
+ ret = old == (uint32_t)*old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ case 8: {
+ uint64_t old = *old_addr;
+
+ asm volatile ("csg %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(uint64_t *)(target))
+ : [new] "d" ((uint64_t)new)
+ : "cc"
+ );
+ ret = old == (uint64_t)*old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ case 16: {
+ __uint128_t old = *old_addr;
+
+ asm volatile ("cdsg %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(__uint128_t *)(target))
+ : [new] "d" (new)
+ : "cc"
+ );
+ ret = old == *old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ }
+ GUEST_FAIL("Invalid size = %u", size);
+ return 0;
+}
+
+const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
+
+static void guest_cmpxchg_key(void)
+{
+ int size, offset;
+ __uint128_t old, new;
+
+ set_storage_key_range(mem1, max_block, 0x10);
+ set_storage_key_range(mem2, max_block, 0x10);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (int i = 0; i < cmpxchg_iter_outer; i++) {
+ do {
+ old = 1;
+ } while (!_cmpxchg(16, mem1, &old, 0));
+ for (int j = 0; j < cmpxchg_iter_inner; j++) {
+ choose_block(true, i + j, &size, &offset);
+ do {
+ new = permutate_bits(true, i + j, size, old);
+ } while (!_cmpxchg(size, mem2 + offset, &old, new));
+ }
+ }
+
+ GUEST_SYNC(STAGE_DONE);
+}
+
+static void *run_guest(void *data)
+{
+ struct test_info *info = data;
+
+ HOST_SYNC(*info, STAGE_DONE);
+ return NULL;
+}
+
+static char *quad_to_char(__uint128_t *quad, int size)
+{
+ return ((char *)quad) + (sizeof(*quad) - size);
+}
+
+static void test_cmpxchg_key_concurrent(void)
+{
+ struct test_default t = test_default_init(guest_cmpxchg_key);
+ int size, offset;
+ __uint128_t old, new;
+ bool success;
+ pthread_t thread;
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
+ pthread_create(&thread, NULL, run_guest, &t.vcpu);
+
+ for (int i = 0; i < cmpxchg_iter_outer; i++) {
+ do {
+ old = 0;
+ new = 1;
+ MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
+ sizeof(new), GADDR_V(mem1),
+ CMPXCHG_OLD(&old),
+ CMPXCHG_SUCCESS(&success), KEY(1));
+ } while (!success);
+ for (int j = 0; j < cmpxchg_iter_inner; j++) {
+ choose_block(false, i + j, &size, &offset);
+ do {
+ new = permutate_bits(false, i + j, size, old);
+ MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
+ size, GADDR_V(mem2 + offset),
+ CMPXCHG_OLD(quad_to_char(&old, size)),
+ CMPXCHG_SUCCESS(&success), KEY(1));
+ } while (!success);
+ }
+ }
+
+ pthread_join(thread, NULL);
+
+ MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
+ TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
+ "Must retain number of set bits");
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_copy_key_fetch_prot(void)
+{
+ /*
+ * For some reason combining the first sync with override enablement
+ * results in an exception when calling HOST_SYNC.
+ */
+ GUEST_SYNC(STAGE_INITED);
+ /* Storage protection override applies to both store and fetch. */
+ set_storage_key_range(mem1, sizeof(mem1), 0x98);
+ set_storage_key_range(mem2, sizeof(mem2), 0x98);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key_storage_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys, storage protection override in effect */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_key_fetch_prot(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm/vcpu, matching key, fetch protection in effect */
+ default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+ default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+#define ERR_PROT_MOP(...) \
+({ \
+ int rv; \
+ \
+ rv = ERR_MOP(__VA_ARGS__); \
+ TEST_ASSERT(rv == 4, "Should result in protection exception"); \
+})
+
+static void guest_error_key(void)
+{
+ GUEST_SYNC(STAGE_INITED);
+ set_storage_key_range(mem1, PAGE_SIZE, 0x18);
+ set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+ GUEST_SYNC(STAGE_IDLED);
+}
+
+static void test_errors_key(void)
+{
+ struct test_default t = test_default_init(guest_error_key);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm/vcpu, mismatching keys, fetch protection in effect */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+ int i;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ for (i = 1; i <= 16; i *= 2) {
+ __uint128_t old = 0;
+
+ ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
+ CMPXCHG_OLD(&old), KEY(2));
+ }
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_termination(void)
+{
+ struct test_default t = test_default_init(guest_error_key);
+ uint64_t prefix;
+ uint64_t teid;
+ uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+ uint64_t psw[2];
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys after first page */
+ ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
+ /*
+ * The memop injected a program exception and the test needs to check the
+ * Translation-Exception Identification (TEID). It is necessary to run
+ * the guest in order to be able to read the TEID from guest memory.
+ * Set the guest program new PSW, so the guest state is not clobbered.
+ */
+ prefix = t.run->s.regs.prefix;
+ psw[0] = t.run->psw_mask;
+ psw[1] = t.run->psw_addr;
+ MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
+ HOST_SYNC(t.vcpu, STAGE_IDLED);
+ MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
+ /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
+ TEST_ASSERT_EQ(teid & teid_mask, 0);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_storage_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm, mismatching keys, storage protection override not applicable to vm */
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+const uint64_t last_page_addr = -PAGE_SIZE;
+
+static void guest_copy_key_fetch_prot_override(void)
+{
+ int i;
+ char *page_0 = 0;
+
+ GUEST_SYNC(STAGE_INITED);
+ set_storage_key_range(0, PAGE_SIZE, 0x18);
+ set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
+ asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ for (i = 0; i < PAGE_SIZE; i++)
+ page_0[i] = mem1[i];
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key_fetch_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys on fetch, fetch protection override applies */
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+ ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+ /*
+ * vcpu, mismatching keys on fetch, fetch protection override applies,
+ * wraparound
+ */
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
+ GADDR_V(guest_last_page), KEY(2));
+ ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_not_enabled(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_enabled(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /*
+ * vcpu, mismatching keys on fetch,
+ * fetch protection override does not apply because memory range exceeded
+ */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
+ GADDR_V(guest_last_page), KEY(2));
+ /* vm, fetch protected override does not apply */
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_idle(void)
+{
+ GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
+ for (;;)
+ GUEST_SYNC(STAGE_IDLED);
+}
+
+static void _test_errors_common(struct test_info info, enum mop_target target, int size)
+{
+ int rv;
+
+ /* Bad size: */
+ rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
+
+ /* Zero size: */
+ rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
+ "ioctl allows 0 as size");
+
+ /* Bad flags: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
+
+ /* Bad guest address: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
+
+ /* Bad host address: */
+ rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == EFAULT,
+ "ioctl does not report bad host memory address");
+
+ /* Bad key: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
+}
+
+static void test_errors(void)
+{
+ struct test_default t = test_default_init(guest_idle);
+ int rv;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ _test_errors_common(t.vcpu, LOGICAL, t.size);
+ _test_errors_common(t.vm, ABSOLUTE, t.size);
+
+ /* Bad operation: */
+ rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+ /* virtual addresses are not translated when passing INVALID */
+ rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
+ /* Bad access register: */
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+ HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
+ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
+ t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
+ HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
+
+ /* Check that the SIDA calls are rejected for non-protected guests */
+ rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl does not reject SIDA_READ in non-protected mode");
+ rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl does not reject SIDA_WRITE in non-protected mode");
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg(void)
+{
+ struct test_default t = test_default_init(guest_idle);
+ __uint128_t old;
+ int rv, i, power = 1;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ for (i = 0; i < 32; i++) {
+ if (i == power) {
+ power *= 2;
+ continue;
+ }
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl allows bad size for cmpxchg");
+ }
+ for (i = 1; i <= 16; i *= 2) {
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
+ }
+ for (i = 2; i <= 16; i *= 2) {
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl allows bad alignment for cmpxchg");
+ }
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int extension_cap, idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
+ extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
+
+ struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool requirements_met;
+ } testlist[] = {
+ {
+ .name = "simple copy",
+ .test = test_copy,
+ .requirements_met = true,
+ },
+ {
+ .name = "generic error checks",
+ .test = test_errors,
+ .requirements_met = true,
+ },
+ {
+ .name = "copy with storage keys",
+ .test = test_copy_key,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "cmpxchg with storage keys",
+ .test = test_cmpxchg_key,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "concurrently cmpxchg with storage keys",
+ .test = test_cmpxchg_key_concurrent,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "copy with key storage protection override",
+ .test = test_copy_key_storage_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with key fetch protection",
+ .test = test_copy_key_fetch_prot,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with key fetch protection override",
+ .test = test_copy_key_fetch_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with access register mode",
+ .test = test_copy_access_register,
+ .requirements_met = true,
+ },
+ {
+ .name = "error checks with key",
+ .test = test_errors_key,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks for cmpxchg with key",
+ .test = test_errors_cmpxchg_key,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "error checks for cmpxchg",
+ .test = test_errors_cmpxchg,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "termination",
+ .test = test_termination,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks with key storage protection override",
+ .test = test_errors_key_storage_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks without key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_not_enabled,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks with key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_enabled,
+ .requirements_met = extension_cap > 0,
+ },
+ };
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (testlist[idx].requirements_met) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
+ testlist[idx].name, extension_cap);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x CPU resets
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define LOCAL_IRQS 32
+
+#define ARBITRARY_NON_ZERO_VCPU_ID 3
+
+struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
+
+static uint8_t regs_null[512];
+
+static void guest_code_initial(void)
+{
+ /* set several CRs to "safe" value */
+ unsigned long cr2_59 = 0x10; /* enable guarded storage */
+ unsigned long cr8_63 = 0x1; /* monitor mask = 1 */
+ unsigned long cr10 = 1; /* PER START */
+ unsigned long cr11 = -1; /* PER END */
+
+
+ /* Dirty registers */
+ asm volatile (
+ " lghi 2,0x11\n" /* Round toward 0 */
+ " sfpc 2\n" /* set fpc to !=0 */
+ " lctlg 2,2,%0\n"
+ " lctlg 8,8,%1\n"
+ " lctlg 10,10,%2\n"
+ " lctlg 11,11,%3\n"
+ /* now clobber some general purpose regs */
+ " llihh 0,0xffff\n"
+ " llihl 1,0x5555\n"
+ " llilh 2,0xaaaa\n"
+ " llill 3,0x0000\n"
+ /* now clobber a floating point reg */
+ " lghi 4,0x1\n"
+ " cdgbr 0,4\n"
+ /* now clobber an access reg */
+ " sar 9,4\n"
+ /* We embed diag 501 here to control register content */
+ " diag 0,0,0x501\n"
+ :
+ : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
+ /* no clobber list as this should not return */
+ );
+}
+
+static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
+{
+ uint64_t eval_reg;
+
+ eval_reg = vcpu_get_reg(vcpu, id);
+ TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
+}
+
+static void assert_noirq(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_irq_state irq_state;
+ int irqs;
+
+ irq_state.len = sizeof(buf);
+ irq_state.buf = (unsigned long)buf;
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
+ /*
+ * irqs contains the number of retrieved interrupts. Any interrupt
+ * (notably, the emergency call interrupt we have injected) should
+ * be cleared by the resets, so this should be 0.
+ */
+ TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
+ TEST_ASSERT(!irqs, "IRQ pending");
+}
+
+static void assert_clear(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ struct kvm_fpu fpu;
+
+ vcpu_regs_get(vcpu, ®s);
+ TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
+
+ vcpu_sregs_get(vcpu, &sregs);
+ TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
+
+ vcpu_fpu_get(vcpu, &fpu);
+ TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+
+ /* sync regs */
+ TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
+ "gprs0-15 == 0 (sync_regs)");
+
+ TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
+ "acrs0-15 == 0 (sync_regs)");
+
+ TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
+ "vrs0-15 == 0 (sync_regs)");
+}
+
+static void assert_initial_noclear(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
+ TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
+ "gpr0 == 0xffff000000000000 (sync_regs)");
+ TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
+ "gpr1 == 0x0000555500000000 (sync_regs)");
+ TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
+ "gpr2 == 0x00000000aaaa0000 (sync_regs)");
+ TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
+ "gpr3 == 0x0000000000000000 (sync_regs)");
+ TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
+ "fpr0 == 0f1 (sync_regs)");
+ TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
+}
+
+static void assert_initial(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+ struct kvm_sregs sregs;
+ struct kvm_fpu fpu;
+
+ /* KVM_GET_SREGS */
+ vcpu_sregs_get(vcpu, &sregs);
+ TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
+ TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
+ "cr14 == 0xC2000000 (KVM_GET_SREGS)");
+ TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
+ "cr1-13 == 0 (KVM_GET_SREGS)");
+ TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
+
+ /* sync regs */
+ TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
+ "cr14 == 0xC2000000 (sync_regs)");
+ TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
+ "cr1-13 == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
+
+ /* kvm_run */
+ TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+
+ vcpu_fpu_get(vcpu, &fpu);
+ TEST_ASSERT(!fpu.fpc, "fpc == 0");
+
+ test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
+ test_one_reg(vcpu, KVM_REG_S390_PP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
+}
+
+static void assert_normal_noclear(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
+ TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
+}
+
+static void assert_normal(struct kvm_vcpu *vcpu)
+{
+ test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+ TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
+ "pft == 0xff..... (sync_regs)");
+ assert_noirq(vcpu);
+}
+
+static void inject_irq(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_irq_state irq_state;
+ struct kvm_s390_irq *irq = &buf[0];
+ int irqs;
+
+ /* Inject IRQ */
+ irq_state.len = sizeof(struct kvm_s390_irq);
+ irq_state.buf = (unsigned long)buf;
+ irq->type = KVM_S390_INT_EMERGENCY;
+ irq->u.emerg.code = vcpu->id;
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
+ TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
+}
+
+static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+
+ *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
+
+ return vm;
+}
+
+static void test_normal(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing normal reset\n");
+ vm = create_vm(&vcpu);
+
+ vcpu_run(vcpu);
+
+ inject_irq(vcpu);
+
+ vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
+
+ /* must clears */
+ assert_normal(vcpu);
+ /* must not clears */
+ assert_normal_noclear(vcpu);
+ assert_initial_noclear(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void test_initial(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing initial reset\n");
+ vm = create_vm(&vcpu);
+
+ vcpu_run(vcpu);
+
+ inject_irq(vcpu);
+
+ vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
+
+ /* must clears */
+ assert_normal(vcpu);
+ assert_initial(vcpu);
+ /* must not clears */
+ assert_initial_noclear(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void test_clear(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing clear reset\n");
+ vm = create_vm(&vcpu);
+
+ vcpu_run(vcpu);
+
+ inject_irq(vcpu);
+
+ vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
+
+ /* must clears */
+ assert_normal(vcpu);
+ assert_initial(vcpu);
+ assert_clear(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool needs_cap;
+} testlist[] = {
+ { "initial", test_initial, false },
+ { "normal", test_normal, true },
+ { "clear", test_clear, true },
+};
+
+int main(int argc, char *argv[])
+{
+ bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
+ testlist[idx].name);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+ asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+ /* Issue some storage key instruction. */
+ set_storage_key((void *)0, 0x98);
+ GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+ struct page_region region;
+ struct pm_scan_arg arg = {
+ .start = (uintptr_t)addr,
+ .end = (uintptr_t)addr + 4096,
+ .vec = (uintptr_t)®ion,
+ .vec_len = 1,
+ .size = sizeof(struct pm_scan_arg),
+ .category_mask = PAGE_IS_PFNZERO,
+ .category_anyof_mask = PAGE_IS_PRESENT,
+ .return_mask = PAGE_IS_PFNZERO,
+ };
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+ char *mem, *page0, *page1, *page2, tmp;
+ const size_t pagesize = getpagesize();
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int pagemap_fd;
+
+ ksft_print_header();
+ ksft_set_plan(3);
+
+ /*
+ * We'll use memory that is not mapped into the VM for simplicity.
+ * Shared zeropages are enabled/disabled per-process.
+ */
+ mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+ /* Disable THP. Ignore errors on older kernels. */
+ madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+ page0 = mem;
+ page1 = page0 + pagesize;
+ page2 = page1 + pagesize;
+
+ /* Can we even detect shared zeropages? */
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_REQUIRE(pagemap_fd >= 0);
+
+ tmp = *page0;
+ asm volatile("" : "+r" (tmp));
+ TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Verify that we get the shared zeropage after VM creation. */
+ tmp = *page1;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+ "Shared zeropages should be enabled\n");
+
+ /*
+ * Let our VM execute a storage key instruction that should
+ * unshare all shared zeropages.
+ */
+ vcpu_run(vcpu);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+ /* Verify that we don't have a shared zeropage anymore. */
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+ "Shared zeropage should be gone\n");
+
+ /* Verify that we don't get any new shared zeropages. */
+ tmp = *page2;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+ "Shared zeropages should be disabled\n");
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_CAP_SYNC_REGS
+ *
+ * Based on the same test for x86:
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Adaptions for s390x:
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "diag318_test_handler.h"
+#include "kselftest.h"
+
+static void guest_code(void)
+{
+ /*
+ * We embed diag 501 here instead of doing a ucall to avoid that
+ * the compiler has messed with r11 at the time of the ucall.
+ */
+ asm volatile (
+ "0: diag 0,0,0x501\n"
+ " ahi 11,1\n"
+ " j 0b\n"
+ );
+}
+
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx", \
+ left->reg, right->reg)
+
+#define REG_COMPARE32(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%x, 0x%x", \
+ left->reg, right->reg)
+
+
+static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ REG_COMPARE(gprs[i]);
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ REG_COMPARE32(acrs[i]);
+
+ for (i = 0; i < 16; i++)
+ REG_COMPARE(crs[i]);
+}
+
+#undef REG_COMPARE
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
+#define INVALID_SYNC_FIELD 0x80000000
+
+void test_read_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_valid_regs = 0;
+}
+
+void test_set_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_dirty_regs = 0;
+}
+
+void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
+
+ /* Request and verify all valid register sets. */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
+ (run->s390_sieic.ipa >> 8) == 0x83 &&
+ (run->s390_sieic.ipb >> 16) == 0x501,
+ "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
+ run->s390_sieic.icptcode, run->s390_sieic.ipa,
+ run->s390_sieic.ipb);
+
+ vcpu_regs_get(vcpu, ®s);
+ compare_regs(®s, &run->s.regs);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
+
+ /* Set and verify various register values */
+ run->s.regs.gprs[11] = 0xBAD1DEA;
+ run->s.regs.acrs[0] = 1 << 11;
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+
+ if (get_diag318_info() > 0) {
+ run->s.regs.diag318 = get_diag318_info();
+ run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+ }
+
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.gprs[11]);
+ TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11,
+ "acr0 sync regs value incorrect 0x%x.",
+ run->s.regs.acrs[0]);
+ TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+ "diag318 sync regs value incorrect 0x%llx.",
+ run->s.regs.diag318);
+
+ vcpu_regs_get(vcpu, ®s);
+ compare_regs(®s, &run->s.regs);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.gprs[11] = 0xDEADBEEF;
+ run->s.regs.diag318 = 0x4B1D;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.gprs[11]);
+ TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+ "diag318 sync regs value incorrect 0x%llx.",
+ run->s.regs.diag318);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu);
+} testlist[] = {
+ { "read invalid", test_read_invalid },
+ { "set invalid", test_set_invalid },
+ { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
+ { "set+verify various regs", test_set_and_verify_various_reg_values },
+ { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
+};
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test(vcpu);
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ *
+ * Copyright IBM Corp. 2021
+ */
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+/* Nonzero return value indicates that address not mapped */
+static int set_storage_key(void *addr, uint8_t key)
+{
+ int not_mapped = 0;
+
+ asm volatile (
+ "lra %[addr], 0(0,%[addr])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[addr]\n"
+ "1:"
+ : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ return -not_mapped;
+}
+
+enum permission {
+ READ_WRITE = 0,
+ READ = 1,
+ RW_PROTECTED = 2,
+ TRANSL_UNAVAIL = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+ uint64_t mask;
+
+ asm volatile (
+ "tprot %[addr], 0(%[key])\n"
+ " ipm %[mask]\n"
+ : [mask] "=r" (mask)
+ : [addr] "Q" (*(char *)addr),
+ [key] "a" (key)
+ : "cc"
+ );
+
+ return (enum permission)(mask >> 28);
+}
+
+enum stage {
+ STAGE_INIT_SIMPLE,
+ TEST_SIMPLE,
+ STAGE_INIT_FETCH_PROT_OVERRIDE,
+ TEST_FETCH_PROT_OVERRIDE,
+ TEST_STORAGE_PROT_OVERRIDE,
+ STAGE_END /* must be the last entry (it's the amount of tests) */
+};
+
+struct test {
+ enum stage stage;
+ void *addr;
+ uint8_t key;
+ enum permission expected;
+} tests[] = {
+ /*
+ * We perform each test in the array by executing TEST PROTECTION on
+ * the specified addr with the specified key and checking if the returned
+ * permissions match the expected value.
+ * Both guest and host cooperate to set up the required test conditions.
+ * A central condition is that the page targeted by addr has to be DAT
+ * protected in the host mappings, in order for KVM to emulate the
+ * TEST PROTECTION instruction.
+ * Since the page tables are shared, the host uses mprotect to achieve
+ * this.
+ *
+ * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
+ * by SIE, not KVM, but there is no harm in testing them also.
+ * See Enhanced Suppression-on-Protection Facilities in the
+ * Interpretive-Execution Mode
+ */
+ /*
+ * guest: set storage key of page_store_prot to 1
+ * storage key of page_fetch_prot to 9 and enable
+ * protection for it
+ * STAGE_INIT_SIMPLE
+ * host: write protect both via mprotect
+ */
+ /* access key 0 matches any storage key -> RW */
+ { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+ /* access key matches storage key -> RW */
+ { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+ /* mismatched keys, but no fetch protection -> RO */
+ { TEST_SIMPLE, page_store_prot, 0x20, READ },
+ /* access key 0 matches any storage key -> RW */
+ { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+ /* access key matches storage key -> RW */
+ { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+ /* mismatched keys, fetch protection -> inaccessible */
+ { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
+ /* page 0 not mapped yet -> translation not available */
+ { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
+ /*
+ * host: try to map page 0
+ * guest: set storage key of page 0 to 9 and enable fetch protection
+ * STAGE_INIT_FETCH_PROT_OVERRIDE
+ * host: write protect page 0
+ * enable fetch protection override
+ */
+ /* mismatched keys, fetch protection, but override applies -> RO */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+ /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
+ /*
+ * host: enable storage protection override
+ */
+ /* mismatched keys, but override applies (storage key 9) -> RW */
+ { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+ /* mismatched keys, no fetch protection, override doesn't apply -> RO */
+ { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+ /* mismatched keys, but override applies (storage key 9) -> RW */
+ { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+ /* end marker */
+ { STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+ enum stage stage = tests[*i].stage;
+ enum permission result;
+ bool skip;
+
+ for (; tests[*i].stage == stage; (*i)++) {
+ /*
+ * Some fetch protection override tests require that page 0
+ * be mapped, however, when the hosts tries to map that page via
+ * vm_vaddr_alloc, it may happen that some other page gets mapped
+ * instead.
+ * In order to skip these tests we detect this inside the guest
+ */
+ skip = tests[*i].addr < (void *)PAGE_SIZE &&
+ tests[*i].expected != TRANSL_UNAVAIL &&
+ !mapped_0;
+ if (!skip) {
+ result = test_protection(tests[*i].addr, tests[*i].key);
+ __GUEST_ASSERT(result == tests[*i].expected,
+ "Wanted %u, got %u, for i = %u",
+ tests[*i].expected, result, *i);
+ }
+ }
+ return stage;
+}
+
+static void guest_code(void)
+{
+ bool mapped_0;
+ int i = 0;
+
+ GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+ GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+ GUEST_SYNC(STAGE_INIT_SIMPLE);
+ GUEST_SYNC(perform_next_stage(&i, false));
+
+ /* Fetch-protection override */
+ mapped_0 = !set_storage_key((void *)0, 0x98);
+ GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+ /* Storage-protection override */
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC_NO_TAP(vcpup, stage) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpup); \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) \
+ REPORT_GUEST_ASSERT(uc); \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
+})
+
+#define HOST_SYNC(vcpu, stage) \
+({ \
+ HOST_SYNC_NO_TAP(vcpu, stage); \
+ ksft_test_result_pass("" #stage "\n"); \
+})
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ vm_vaddr_t guest_0_page;
+
+ ksft_print_header();
+ ksft_set_plan(STAGE_END);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+ HOST_SYNC(vcpu, TEST_SIMPLE);
+
+ guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+ if (guest_0_page != 0) {
+ /* Use NO_TAP so we don't get a PASS print */
+ HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
+ "Did not allocate page at 0\n");
+ } else {
+ HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ }
+ if (guest_0_page == 0)
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+ run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
+
+ run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test code for the s390x kvm ucontrol interface
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+#include "debug_print.h"
+#include "kselftest_harness.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sie.h"
+
+#include <linux/capability.h>
+#include <linux/sizes.h>
+
+#define PGM_SEGMENT_TRANSLATION 0x10
+
+#define VM_MEM_SIZE (4 * SZ_1M)
+#define VM_MEM_EXT_SIZE (2 * SZ_1M)
+#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
+
+/* so directly declare capget to check caps without libcap */
+int capget(cap_user_header_t header, cap_user_data_t data);
+
+/**
+ * In order to create user controlled virtual machines on S390,
+ * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
+ * as privileged user (SYS_ADMIN).
+ */
+void require_ucontrol_admin(void)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ int rc;
+
+ rc = capget(&hdr, data);
+ TEST_ASSERT_EQ(0, rc);
+ TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+}
+
+/* Test program setting some registers and looping */
+extern char test_gprs_asm[];
+asm("test_gprs_asm:\n"
+ "xgr %r0, %r0\n"
+ "lgfi %r1,1\n"
+ "lgfi %r2,2\n"
+ "lgfi %r3,3\n"
+ "lgfi %r4,4\n"
+ "lgfi %r5,5\n"
+ "lgfi %r6,6\n"
+ "lgfi %r7,7\n"
+ "0:\n"
+ " diag 0,0,0x44\n"
+ " ahi %r0,1\n"
+ " j 0b\n"
+);
+
+/* Test program manipulating memory */
+extern char test_mem_asm[];
+asm("test_mem_asm:\n"
+ "xgr %r0, %r0\n"
+
+ "0:\n"
+ " ahi %r0,1\n"
+ " st %r1,0(%r5,%r6)\n"
+
+ " xgr %r1,%r1\n"
+ " l %r1,0(%r5,%r6)\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " j 0b\n"
+);
+
+/* Test program manipulating storage keys */
+extern char test_skey_asm[];
+asm("test_skey_asm:\n"
+ "xgr %r0, %r0\n"
+
+ "0:\n"
+ " ahi %r0,1\n"
+ " st %r1,0(%r5,%r6)\n"
+
+ " iske %r1,%r6\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " sske %r1,%r6\n"
+ " xgr %r1,%r1\n"
+ " iske %r1,%r6\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " rrbe %r1,%r6\n"
+ " iske %r1,%r6\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " j 0b\n"
+);
+
+FIXTURE(uc_kvm)
+{
+ struct kvm_s390_sie_block *sie_block;
+ struct kvm_run *run;
+ uintptr_t base_gpa;
+ uintptr_t code_gpa;
+ uintptr_t base_hva;
+ uintptr_t code_hva;
+ int kvm_run_size;
+ vm_paddr_t pgd;
+ void *vm_mem;
+ int vcpu_fd;
+ int kvm_fd;
+ int vm_fd;
+};
+
+/**
+ * create VM with single vcpu, map kvm_run and SIE control block for easy access
+ */
+FIXTURE_SETUP(uc_kvm)
+{
+ struct kvm_s390_vm_cpu_processor info;
+ int rc;
+
+ require_ucontrol_admin();
+
+ self->kvm_fd = open_kvm_dev_path_or_exit();
+ self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+ ASSERT_GE(self->vm_fd, 0);
+
+ kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+ TH_LOG("create VM 0x%llx", info.cpuid);
+
+ self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
+ ASSERT_GE(self->vcpu_fd, 0);
+
+ self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
+ TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
+ self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
+ ASSERT_NE(self->run, MAP_FAILED);
+ /**
+ * For virtual cpus that have been created with S390 user controlled
+ * virtual machines, the resulting vcpu fd can be memory mapped at page
+ * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
+ * the virtual cpu's hardware control block.
+ */
+ self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
+ ASSERT_NE(self->sie_block, MAP_FAILED);
+
+ TH_LOG("VM created %p %p", self->run, self->sie_block);
+
+ self->base_gpa = 0;
+ self->code_gpa = self->base_gpa + (3 * SZ_1M);
+
+ self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
+ ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
+ self->base_hva = (uintptr_t)self->vm_mem;
+ self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = self->base_hva,
+ .vcpu_addr = self->base_gpa,
+ .length = VM_MEM_SIZE,
+ };
+ TH_LOG("ucas map %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+ ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
+ rc, strerror(errno));
+
+ TH_LOG("page in %p", (void *)self->base_gpa);
+ rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
+ ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
+ (void *)self->base_hva, rc, strerror(errno));
+
+ self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
+}
+
+FIXTURE_TEARDOWN(uc_kvm)
+{
+ munmap(self->sie_block, PAGE_SIZE);
+ munmap(self->run, self->kvm_run_size);
+ close(self->vcpu_fd);
+ close(self->vm_fd);
+ close(self->kvm_fd);
+ free(self->vm_mem);
+}
+
+TEST_F(uc_kvm, uc_sie_assertions)
+{
+ /* assert interception of Code 08 (Program Interruption) is set */
+ EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
+}
+
+TEST_F(uc_kvm, uc_attr_mem_limit)
+{
+ u64 limit;
+ struct kvm_device_attr attr = {
+ .group = KVM_S390_VM_MEM_CTRL,
+ .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
+ .addr = (unsigned long)&limit,
+ };
+ int rc;
+
+ rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(0, rc);
+ EXPECT_EQ(~0UL, limit);
+
+ /* assert set not supported */
+ rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_no_dirty_log)
+{
+ struct kvm_dirty_log dlog;
+ int rc;
+
+ rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+/**
+ * Assert HPAGE CAP cannot be enabled on UCONTROL VM
+ */
+TEST(uc_cap_hpage)
+{
+ int rc, kvm_fd, vm_fd, vcpu_fd;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_S390_HPAGE_1M,
+ };
+
+ require_ucontrol_admin();
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+ ASSERT_GE(vm_fd, 0);
+
+ /* assert hpages are not supported on ucontrol vm */
+ rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
+ EXPECT_EQ(0, rc);
+
+ /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
+ rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* assert HPAGE CAP is rejected after vCPU creation */
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ ASSERT_GE(vcpu_fd, 0);
+ rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EBUSY, errno);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+/* calculate host virtual addr from guest physical addr */
+static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
+{
+ return (void *)(self->base_hva - self->base_gpa + gpa);
+}
+
+/* map / make additional memory available */
+static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = (u64)gpa2hva(self, vcpu_addr),
+ .vcpu_addr = vcpu_addr,
+ .length = length,
+ };
+ pr_info("ucas map %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+}
+
+/* unmap previously mapped memory */
+static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = (u64)gpa2hva(self, vcpu_addr),
+ .vcpu_addr = vcpu_addr,
+ .length = length,
+ };
+ pr_info("ucas unmap %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
+}
+
+/* handle ucontrol exit by mapping the accessed segment */
+static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_run *run = self->run;
+ u64 seg_addr;
+ int rc;
+
+ TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+ switch (run->s390_ucontrol.pgm_code) {
+ case PGM_SEGMENT_TRANSLATION:
+ seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
+ pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
+ run->s390_ucontrol.trans_exc_code, seg_addr);
+ /* map / make additional memory available */
+ rc = uc_map_ext(self, seg_addr, SZ_1M);
+ TEST_ASSERT_EQ(0, rc);
+ break;
+ default:
+ TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
+ }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+ /* disable KSS */
+ sie_block->cpuflags &= ~CPUSTAT_KSS;
+ /* disable skey inst interception */
+ sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+/*
+ * Handle the instruction intercept
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ int ilen = insn_length(sie_block->ipa >> 8);
+ struct kvm_run *run = self->run;
+
+ switch (run->s390_sieic.ipa) {
+ case 0xB229: /* ISKE */
+ case 0xB22b: /* SSKE */
+ case 0xB22a: /* RRBE */
+ uc_skey_enable(self);
+
+ /* rewind to reexecute intercepted instruction */
+ run->psw_addr = run->psw_addr - ilen;
+ pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ struct kvm_run *run = self->run;
+
+ /* check SIE interception code */
+ pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
+ run->s390_sieic.icptcode,
+ run->s390_sieic.ipa,
+ run->s390_sieic.ipb);
+ switch (run->s390_sieic.icptcode) {
+ case ICPT_INST:
+ /* end execution in caller on intercepted instruction */
+ pr_info("sie instruction interception\n");
+ return uc_handle_insn_ic(self);
+ case ICPT_KSS:
+ uc_skey_enable(self);
+ return true;
+ case ICPT_OPEREXC:
+ /* operation exception */
+ TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
+ default:
+ TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
+ }
+ return true;
+}
+
+/* verify VM state on exit */
+static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_run *run = self->run;
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_S390_UCONTROL:
+ /** check program interruption code
+ * handle page fault --> ucas map
+ */
+ uc_handle_exit_ucontrol(self);
+ break;
+ case KVM_EXIT_S390_SIEIC:
+ return uc_handle_sieic(self);
+ default:
+ pr_info("exit_reason %2d not handled\n", run->exit_reason);
+ }
+ return true;
+}
+
+/* run the VM until interrupted */
+static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
+{
+ int rc;
+
+ rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
+ print_run(self->run, self->sie_block);
+ print_regs(self->run);
+ pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
+ return rc;
+}
+
+static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+ /* assert vm was interrupted by diag 0x0044 */
+ TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+ TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+ TEST_ASSERT_EQ(0x8300, sie_block->ipa);
+ TEST_ASSERT_EQ(0x440000, sie_block->ipb);
+}
+
+TEST_F(uc_kvm, uc_no_user_region)
+{
+ struct kvm_userspace_memory_region region = {
+ .slot = 1,
+ .guest_phys_addr = self->code_gpa,
+ .memory_size = VM_MEM_EXT_SIZE,
+ .userspace_addr = (uintptr_t)self->code_hva,
+ };
+ struct kvm_userspace_memory_region2 region2 = {
+ .slot = 1,
+ .guest_phys_addr = self->code_gpa,
+ .memory_size = VM_MEM_EXT_SIZE,
+ .userspace_addr = (uintptr_t)self->code_hva,
+ };
+
+ ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2));
+ ASSERT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_map_unmap)
+{
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ struct kvm_run *run = self->run;
+ const u64 disp = 1;
+ int rc;
+
+ /* copy test_mem_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ /* set register content for test_mem_asm to access not mapped memory*/
+ sync_regs->gprs[1] = 0x55;
+ sync_regs->gprs[5] = self->base_gpa;
+ sync_regs->gprs[6] = VM_MEM_SIZE + disp;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* run and expect to fail with ucontrol pic segment translation */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+ ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+
+ ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+ ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
+
+ /* fail to map memory with not segment aligned address */
+ rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
+ ASSERT_GT(0, rc)
+ TH_LOG("ucas map for non segment address should fail but didn't; "
+ "result %d not expected, %s", rc, strerror(errno));
+
+ /* map / make additional memory available */
+ rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+ ASSERT_EQ(0, rc)
+ TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* assert registers and memory are in expected state */
+ ASSERT_EQ(2, sync_regs->gprs[0]);
+ ASSERT_EQ(0x55, sync_regs->gprs[1]);
+ ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
+
+ /* unmap and run loop again */
+ rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+ ASSERT_EQ(0, rc)
+ TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(3, sync_regs->gprs[0]);
+ ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+ ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+ /* handle ucontrol exit and remap memory after previous map and unmap */
+ ASSERT_EQ(true, uc_handle_exit(self));
+}
+
+TEST_F(uc_kvm, uc_gprs)
+{
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ struct kvm_run *run = self->run;
+ struct kvm_regs regs = {};
+
+ /* Set registers to values that are different from the ones that we expect below */
+ for (int i = 0; i < 8; i++)
+ sync_regs->gprs[i] = 8;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* copy test_gprs_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ /* run and expect interception of diag 44 */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* Retrieve and check guest register values */
+ ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s));
+ for (int i = 0; i < 8; i++) {
+ ASSERT_EQ(i, regs.gprs[i]);
+ ASSERT_EQ(i, sync_regs->gprs[i]);
+ }
+
+ /* run and expect interception of diag 44 again */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* check continued increment of register 0 value */
+ ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s));
+ ASSERT_EQ(1, regs.gprs[0]);
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+}
+
+TEST_F(uc_kvm, uc_skey)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
+ struct kvm_run *run = self->run;
+ const u8 skeyvalue = 0x34;
+
+ /* copy test_skey_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
+
+ /* set register content for test_skey_asm to access not mapped memory */
+ sync_regs->gprs[1] = skeyvalue;
+ sync_regs->gprs[5] = self->base_gpa;
+ sync_regs->gprs[6] = test_vaddr;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(true, uc_handle_exit(self));
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+
+ /* ISKE */
+ ASSERT_EQ(0, uc_run_once(self));
+
+ /*
+ * Bail out and skip the test after uc_skey_enable was executed but iske
+ * is still intercepted. Instructions are not handled by the kernel.
+ * Thus there is no need to test this here.
+ */
+ TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
+ TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
+ TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+ TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+ TEST_REQUIRE(sie_block->ipa != 0xb229);
+
+ /* ISKE contd. */
+ ASSERT_EQ(false, uc_handle_exit(self));
+ ASSERT_EQ(2, sync_regs->gprs[0]);
+ /* assert initial skey (ACC = 0, R & C = 1) */
+ ASSERT_EQ(0x06, sync_regs->gprs[1]);
+ uc_assert_diag44(self);
+
+ /* SSKE + ISKE */
+ sync_regs->gprs[1] = skeyvalue;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ ASSERT_EQ(3, sync_regs->gprs[0]);
+ ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
+ uc_assert_diag44(self);
+
+ /* RRBE + ISKE */
+ sync_regs->gprs[1] = skeyvalue;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ ASSERT_EQ(4, sync_regs->gprs[0]);
+ /* assert R reset but rest of skey unchanged */
+ ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
+ ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
+ uc_assert_diag44(self);
+}
+
+TEST_HARNESS_MAIN
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for s390x CMMA migration
- *
- * Copyright IBM Corp. 2023
- *
- * Authors:
- * Nico Boehr <nrb@linux.ibm.com>
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-#define MAIN_PAGE_COUNT 512
-
-#define TEST_DATA_PAGE_COUNT 512
-#define TEST_DATA_MEMSLOT 1
-#define TEST_DATA_START_GFN PAGE_SIZE
-
-#define TEST_DATA_TWO_PAGE_COUNT 256
-#define TEST_DATA_TWO_MEMSLOT 2
-#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
-
-static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
-
-/**
- * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
- * so use_cmma goes on and the CMMA related ioctls do something.
- */
-static void guest_do_one_essa(void)
-{
- asm volatile(
- /* load TEST_DATA_START_GFN into r1 */
- " llilf 1,%[start_gfn]\n"
- /* calculate the address from the gfn */
- " sllg 1,1,12(0)\n"
- /* set the first page in TEST_DATA memslot to STABLE */
- " .insn rrf,0xb9ab0000,2,1,1,0\n"
- /* hypercall */
- " diag 0,0,0x501\n"
- "0: j 0b"
- :
- : [start_gfn] "L"(TEST_DATA_START_GFN)
- : "r1", "r2", "memory", "cc"
- );
-}
-
-/**
- * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
- * state.
- */
-static void guest_dirty_test_data(void)
-{
- asm volatile(
- /* r1 = TEST_DATA_START_GFN */
- " xgr 1,1\n"
- " llilf 1,%[start_gfn]\n"
- /* r5 = TEST_DATA_PAGE_COUNT */
- " lghi 5,%[page_count]\n"
- /* r5 += r1 */
- "2: agfr 5,1\n"
- /* r2 = r1 << PAGE_SHIFT */
- "1: sllg 2,1,12(0)\n"
- /* essa(r4, r2, SET_STABLE) */
- " .insn rrf,0xb9ab0000,4,2,1,0\n"
- /* i++ */
- " agfi 1,1\n"
- /* if r1 < r5 goto 1 */
- " cgrjl 1,5,1b\n"
- /* hypercall */
- " diag 0,0,0x501\n"
- "0: j 0b"
- :
- : [start_gfn] "L"(TEST_DATA_START_GFN),
- [page_count] "L"(TEST_DATA_PAGE_COUNT)
- :
- /* the counter in our loop over the pages */
- "r1",
- /* the calculated page physical address */
- "r2",
- /* ESSA output register */
- "r4",
- /* last page */
- "r5",
- "cc", "memory"
- );
-}
-
-static void create_main_memslot(struct kvm_vm *vm)
-{
- int i;
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
- /* set the array of memslots to zero like __vm_create does */
- for (i = 0; i < NR_MEM_REGIONS; i++)
- vm->memslots[i] = 0;
-}
-
-static void create_test_memslot(struct kvm_vm *vm)
-{
- vm_userspace_mem_region_add(vm,
- VM_MEM_SRC_ANONYMOUS,
- TEST_DATA_START_GFN << vm->page_shift,
- TEST_DATA_MEMSLOT,
- TEST_DATA_PAGE_COUNT,
- 0
- );
- vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
-}
-
-static void create_memslots(struct kvm_vm *vm)
-{
- /*
- * Our VM has the following memory layout:
- * +------+---------------------------+
- * | GFN | Memslot |
- * +------+---------------------------+
- * | 0 | |
- * | ... | MAIN (Code, Stack, ...) |
- * | 511 | |
- * +------+---------------------------+
- * | 4096 | |
- * | ... | TEST_DATA |
- * | 4607 | |
- * +------+---------------------------+
- */
- create_main_memslot(vm);
- create_test_memslot(vm);
-}
-
-static void finish_vm_setup(struct kvm_vm *vm)
-{
- struct userspace_mem_region *slot0;
-
- kvm_vm_elf_load(vm, program_invocation_name);
-
- slot0 = memslot2region(vm, 0);
- ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
-
- kvm_arch_vm_post_create(vm);
-}
-
-static struct kvm_vm *create_vm_two_memslots(void)
-{
- struct kvm_vm *vm;
-
- vm = vm_create_barebones();
-
- create_memslots(vm);
-
- finish_vm_setup(vm);
-
- return vm;
-}
-
-static void enable_cmma(struct kvm_vm *vm)
-{
- int r;
-
- r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
- TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
-}
-
-static void enable_dirty_tracking(struct kvm_vm *vm)
-{
- vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
- vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
-}
-
-static int __enable_migration_mode(struct kvm_vm *vm)
-{
- return __kvm_device_attr_set(vm->fd,
- KVM_S390_VM_MIGRATION,
- KVM_S390_VM_MIGRATION_START,
- NULL
- );
-}
-
-static void enable_migration_mode(struct kvm_vm *vm)
-{
- int r = __enable_migration_mode(vm);
-
- TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
-}
-
-static bool is_migration_mode_on(struct kvm_vm *vm)
-{
- u64 out;
- int r;
-
- r = __kvm_device_attr_get(vm->fd,
- KVM_S390_VM_MIGRATION,
- KVM_S390_VM_MIGRATION_STATUS,
- &out
- );
- TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
- return out;
-}
-
-static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
-{
- struct kvm_s390_cmma_log args;
- int rc;
-
- errno = 0;
-
- args = (struct kvm_s390_cmma_log){
- .start_gfn = 0,
- .count = sizeof(cmma_value_buf),
- .flags = flags,
- .values = (__u64)&cmma_value_buf[0]
- };
- rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-
- *errno_out = errno;
- return rc;
-}
-
-static void test_get_cmma_basic(void)
-{
- struct kvm_vm *vm = create_vm_two_memslots();
- struct kvm_vcpu *vcpu;
- int rc, errno_out;
-
- /* GET_CMMA_BITS without CMMA enabled should fail */
- rc = vm_get_cmma_bits(vm, 0, &errno_out);
- TEST_ASSERT_EQ(rc, -1);
- TEST_ASSERT_EQ(errno_out, ENXIO);
-
- enable_cmma(vm);
- vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-
- vcpu_run(vcpu);
-
- /* GET_CMMA_BITS without migration mode and without peeking should fail */
- rc = vm_get_cmma_bits(vm, 0, &errno_out);
- TEST_ASSERT_EQ(rc, -1);
- TEST_ASSERT_EQ(errno_out, EINVAL);
-
- /* GET_CMMA_BITS without migration mode and with peeking should work */
- rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
- TEST_ASSERT_EQ(rc, 0);
- TEST_ASSERT_EQ(errno_out, 0);
-
- enable_dirty_tracking(vm);
- enable_migration_mode(vm);
-
- /* GET_CMMA_BITS with invalid flags */
- rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
- TEST_ASSERT_EQ(rc, -1);
- TEST_ASSERT_EQ(errno_out, EINVAL);
-
- kvm_vm_free(vm);
-}
-
-static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
-{
- TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
- TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
- TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
- TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
-}
-
-static void test_migration_mode(void)
-{
- struct kvm_vm *vm = vm_create_barebones();
- struct kvm_vcpu *vcpu;
- u64 orig_psw;
- int rc;
-
- /* enabling migration mode on a VM without memory should fail */
- rc = __enable_migration_mode(vm);
- TEST_ASSERT_EQ(rc, -1);
- TEST_ASSERT_EQ(errno, EINVAL);
- TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
- errno = 0;
-
- create_memslots(vm);
- finish_vm_setup(vm);
-
- enable_cmma(vm);
- vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
- orig_psw = vcpu->run->psw_addr;
-
- /*
- * Execute one essa instruction in the guest. Otherwise the guest will
- * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
- */
- vcpu_run(vcpu);
- assert_exit_was_hypercall(vcpu);
-
- /* migration mode when memslots have dirty tracking off should fail */
- rc = __enable_migration_mode(vm);
- TEST_ASSERT_EQ(rc, -1);
- TEST_ASSERT_EQ(errno, EINVAL);
- TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
- errno = 0;
-
- /* enable dirty tracking */
- enable_dirty_tracking(vm);
-
- /* enabling migration mode should work now */
- rc = __enable_migration_mode(vm);
- TEST_ASSERT_EQ(rc, 0);
- TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
- errno = 0;
-
- /* execute another ESSA instruction to see this goes fine */
- vcpu->run->psw_addr = orig_psw;
- vcpu_run(vcpu);
- assert_exit_was_hypercall(vcpu);
-
- /*
- * With migration mode on, create a new memslot with dirty tracking off.
- * This should turn off migration mode.
- */
- TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
- vm_userspace_mem_region_add(vm,
- VM_MEM_SRC_ANONYMOUS,
- TEST_DATA_TWO_START_GFN << vm->page_shift,
- TEST_DATA_TWO_MEMSLOT,
- TEST_DATA_TWO_PAGE_COUNT,
- 0
- );
- TEST_ASSERT(!is_migration_mode_on(vm),
- "creating memslot without dirty tracking turns off migration mode"
- );
-
- /* ESSA instructions should still execute fine */
- vcpu->run->psw_addr = orig_psw;
- vcpu_run(vcpu);
- assert_exit_was_hypercall(vcpu);
-
- /*
- * Turn on dirty tracking on the new memslot.
- * It should be possible to turn migration mode back on again.
- */
- vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
- rc = __enable_migration_mode(vm);
- TEST_ASSERT_EQ(rc, 0);
- TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
- errno = 0;
-
- /*
- * Turn off dirty tracking again, this time with just a flag change.
- * Again, migration mode should turn off.
- */
- TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
- vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
- TEST_ASSERT(!is_migration_mode_on(vm),
- "disabling dirty tracking should turn off migration mode"
- );
-
- /* ESSA instructions should still execute fine */
- vcpu->run->psw_addr = orig_psw;
- vcpu_run(vcpu);
- assert_exit_was_hypercall(vcpu);
-
- kvm_vm_free(vm);
-}
-
-/**
- * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
- * CMMA attributes of all pages in both memslots and nothing more dirty.
- * This has the useful side effect of ensuring nothing is CMMA dirty after this
- * function.
- */
-static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
-{
- struct kvm_s390_cmma_log args;
-
- /*
- * First iteration - everything should be dirty.
- * Start at the main memslot...
- */
- args = (struct kvm_s390_cmma_log){
- .start_gfn = 0,
- .count = sizeof(cmma_value_buf),
- .flags = 0,
- .values = (__u64)&cmma_value_buf[0]
- };
- memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
- vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
- TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
- TEST_ASSERT_EQ(args.start_gfn, 0);
-
- /* ...and then - after a hole - the TEST_DATA memslot should follow */
- args = (struct kvm_s390_cmma_log){
- .start_gfn = MAIN_PAGE_COUNT,
- .count = sizeof(cmma_value_buf),
- .flags = 0,
- .values = (__u64)&cmma_value_buf[0]
- };
- memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
- vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
- TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
- TEST_ASSERT_EQ(args.remaining, 0);
-
- /* ...and nothing else should be there */
- args = (struct kvm_s390_cmma_log){
- .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
- .count = sizeof(cmma_value_buf),
- .flags = 0,
- .values = (__u64)&cmma_value_buf[0]
- };
- memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
- vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- TEST_ASSERT_EQ(args.count, 0);
- TEST_ASSERT_EQ(args.start_gfn, 0);
- TEST_ASSERT_EQ(args.remaining, 0);
-}
-
-/**
- * Given a VM, assert no pages are CMMA dirty.
- */
-static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
-{
- struct kvm_s390_cmma_log args;
-
- /* If we start from GFN 0 again, nothing should be dirty. */
- args = (struct kvm_s390_cmma_log){
- .start_gfn = 0,
- .count = sizeof(cmma_value_buf),
- .flags = 0,
- .values = (__u64)&cmma_value_buf[0]
- };
- memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
- vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- if (args.count || args.remaining || args.start_gfn)
- TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
- args.start_gfn,
- args.count,
- args.remaining
- );
-}
-
-static void test_get_inital_dirty(void)
-{
- struct kvm_vm *vm = create_vm_two_memslots();
- struct kvm_vcpu *vcpu;
-
- enable_cmma(vm);
- vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-
- /*
- * Execute one essa instruction in the guest. Otherwise the guest will
- * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
- */
- vcpu_run(vcpu);
- assert_exit_was_hypercall(vcpu);
-
- enable_dirty_tracking(vm);
- enable_migration_mode(vm);
-
- assert_all_slots_cmma_dirty(vm);
-
- /* Start from the beginning again and make sure nothing else is dirty */
- assert_no_pages_cmma_dirty(vm);
-
- kvm_vm_free(vm);
-}
-
-static void query_cmma_range(struct kvm_vm *vm,
- u64 start_gfn, u64 gfn_count,
- struct kvm_s390_cmma_log *res_out)
-{
- *res_out = (struct kvm_s390_cmma_log){
- .start_gfn = start_gfn,
- .count = gfn_count,
- .flags = 0,
- .values = (__u64)&cmma_value_buf[0]
- };
- memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
- vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
-}
-
-/**
- * Assert the given cmma_log struct that was executed by query_cmma_range()
- * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
- * dirty_gfn_count CMMA values.
- */
-static void assert_cmma_dirty(u64 first_dirty_gfn,
- u64 dirty_gfn_count,
- const struct kvm_s390_cmma_log *res)
-{
- TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
- TEST_ASSERT_EQ(res->count, dirty_gfn_count);
- for (size_t i = 0; i < dirty_gfn_count; i++)
- TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
- TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
-}
-
-static void test_get_skip_holes(void)
-{
- size_t gfn_offset;
- struct kvm_vm *vm = create_vm_two_memslots();
- struct kvm_s390_cmma_log log;
- struct kvm_vcpu *vcpu;
- u64 orig_psw;
-
- enable_cmma(vm);
- vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
-
- orig_psw = vcpu->run->psw_addr;
-
- /*
- * Execute some essa instructions in the guest. Otherwise the guest will
- * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
- */
- vcpu_run(vcpu);
- assert_exit_was_hypercall(vcpu);
-
- enable_dirty_tracking(vm);
- enable_migration_mode(vm);
-
- /* un-dirty all pages */
- assert_all_slots_cmma_dirty(vm);
-
- /* Then, dirty just the TEST_DATA memslot */
- vcpu->run->psw_addr = orig_psw;
- vcpu_run(vcpu);
-
- gfn_offset = TEST_DATA_START_GFN;
- /**
- * Query CMMA attributes of one page, starting at page 0. Since the
- * main memslot was not touched by the VM, this should yield the first
- * page of the TEST_DATA memslot.
- * The dirty bitmap should now look like this:
- * 0: not dirty
- * [0x1, 0x200): dirty
- */
- query_cmma_range(vm, 0, 1, &log);
- assert_cmma_dirty(gfn_offset, 1, &log);
- gfn_offset++;
-
- /**
- * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
- * memslot. This should wrap back to the beginning of the TEST_DATA
- * memslot, page 1.
- * The dirty bitmap should now look like this:
- * [0, 0x21): not dirty
- * [0x21, 0x200): dirty
- */
- query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
- assert_cmma_dirty(gfn_offset, 0x20, &log);
- gfn_offset += 0x20;
-
- /* Skip 32 pages */
- gfn_offset += 0x20;
-
- /**
- * After skipping 32 pages, query the next 32 (0x20) pages.
- * The dirty bitmap should now look like this:
- * [0, 0x21): not dirty
- * [0x21, 0x41): dirty
- * [0x41, 0x61): not dirty
- * [0x61, 0x200): dirty
- */
- query_cmma_range(vm, gfn_offset, 0x20, &log);
- assert_cmma_dirty(gfn_offset, 0x20, &log);
- gfn_offset += 0x20;
-
- /**
- * Query 1 page from the beginning of the TEST_DATA memslot. This should
- * yield page 0x21.
- * The dirty bitmap should now look like this:
- * [0, 0x22): not dirty
- * [0x22, 0x41): dirty
- * [0x41, 0x61): not dirty
- * [0x61, 0x200): dirty
- */
- query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
- assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
- gfn_offset++;
-
- /**
- * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
- * This should yield pages [0x23, 0x33).
- * The dirty bitmap should now look like this:
- * [0, 0x22): not dirty
- * 0x22: dirty
- * [0x23, 0x33): not dirty
- * [0x33, 0x41): dirty
- * [0x41, 0x61): not dirty
- * [0x61, 0x200): dirty
- */
- gfn_offset = TEST_DATA_START_GFN + 0x23;
- query_cmma_range(vm, gfn_offset, 15, &log);
- assert_cmma_dirty(gfn_offset, 15, &log);
-
- /**
- * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
- * This should yield page [0x22, 0x33)
- * The dirty bitmap should now look like this:
- * [0, 0x33): not dirty
- * [0x33, 0x41): dirty
- * [0x41, 0x61): not dirty
- * [0x61, 0x200): dirty
- */
- gfn_offset = TEST_DATA_START_GFN + 0x22;
- query_cmma_range(vm, gfn_offset, 17, &log);
- assert_cmma_dirty(gfn_offset, 17, &log);
-
- /**
- * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
- * This should yield page 0x40 and nothing more, since there are more
- * than 16 non-dirty pages after page 0x40.
- * The dirty bitmap should now look like this:
- * [0, 0x33): not dirty
- * [0x33, 0x40): dirty
- * [0x40, 0x61): not dirty
- * [0x61, 0x200): dirty
- */
- gfn_offset = TEST_DATA_START_GFN + 0x40;
- query_cmma_range(vm, gfn_offset, 25, &log);
- assert_cmma_dirty(gfn_offset, 1, &log);
-
- /**
- * Query pages [0x33, 0x40).
- * The dirty bitmap should now look like this:
- * [0, 0x61): not dirty
- * [0x61, 0x200): dirty
- */
- gfn_offset = TEST_DATA_START_GFN + 0x33;
- query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
- assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
-
- /**
- * Query the remaining pages [0x61, 0x200).
- */
- gfn_offset = TEST_DATA_START_GFN;
- query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
- assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
-
- assert_no_pages_cmma_dirty(vm);
-}
-
-struct testdef {
- const char *name;
- void (*test)(void);
-} testlist[] = {
- { "migration mode and dirty tracking", test_migration_mode },
- { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
- { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
- { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
-};
-
-/**
- * The kernel may support CMMA, but the machine may not (i.e. if running as
- * guest-3).
- *
- * In this case, the CMMA capabilities are all there, but the CMMA-related
- * ioctls fail. To find out whether the machine supports CMMA, create a
- * temporary VM and then query the CMMA feature of the VM.
- */
-static int machine_has_cmma(void)
-{
- struct kvm_vm *vm = vm_create_barebones();
- int r;
-
- r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
- kvm_vm_free(vm);
-
- return r;
-}
-
-int main(int argc, char *argv[])
-{
- int idx;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
- TEST_REQUIRE(machine_has_cmma());
-
- ksft_print_header();
-
- ksft_set_plan(ARRAY_SIZE(testlist));
-
- for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
- testlist[idx].test();
- ksft_test_result_pass("%s\n", testlist[idx].name);
- }
-
- ksft_finished(); /* Print results and exit() accordingly */
-}
+++ /dev/null
-CONFIG_KVM=y
-CONFIG_KVM_S390_UCONTROL=y
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- * Hariharan Mari <hari55@linux.ibm.com>
- *
- * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
- * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
- * query data reported via the CPU Model, allowing us to directly compare it with the data
- * acquired through executing the queries in the test.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include "facility.h"
-
-#include "kvm_util.h"
-
-#define PLO_FUNCTION_MAX 256
-
-/* Query available CPU subfunctions */
-struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
-
-static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
- struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
-{
- int r;
-
- r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
- KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
-
- TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
-}
-
-static inline int plo_test_bit(unsigned char nr)
-{
- unsigned long function = nr | 0x100;
- int cc;
-
- asm volatile(" lgr 0,%[function]\n"
- /* Parameter registers are ignored for "test bit" */
- " plo 0,0,0,0(0)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc)
- : [function] "d" (function)
- : "cc", "0");
- return cc == 0;
-}
-
-/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
-static void test_plo_asm_block(u8 (*query)[32])
-{
- for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
- if (plo_test_bit(i))
- (*query)[i >> 3] |= 0x80 >> (i & 7);
- }
-}
-
-/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
-static void test_kmac_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb91e0000,0,2\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
-static void test_kmc_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb92f0000,2,4\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
-static void test_km_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb92e0000,2,4\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
-static void test_kimd_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb93e0000,0,2\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
-static void test_klmd_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb93f0000,0,2\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
-static void test_kmctr_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rrf,0xb92d0000,2,4,6,0\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
-static void test_kmf_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb92a0000,2,4\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
-static void test_kmo_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb92b0000,2,4\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
-static void test_pcc_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb92c0000,0,0\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
-static void test_prno_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb93c0000,2,4\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
-static void test_kma_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rrf,0xb9290000,2,4,6,0\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
-static void test_kdsa_asm_block(u8 (*query)[16])
-{
- asm volatile(" la %%r1,%[query]\n"
- " xgr %%r0,%%r0\n"
- " .insn rre,0xb93a0000,0,2\n"
- : [query] "=R" (*query)
- :
- : "cc", "r0", "r1");
-}
-
-/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
-static void test_sortl_asm_block(u8 (*query)[32])
-{
- asm volatile(" lghi 0,0\n"
- " la 1,%[query]\n"
- " .insn rre,0xb9380000,2,4\n"
- : [query] "=R" (*query)
- :
- : "cc", "0", "1");
-}
-
-/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
-static void test_dfltcc_asm_block(u8 (*query)[32])
-{
- asm volatile(" lghi 0,0\n"
- " la 1,%[query]\n"
- " .insn rrf,0xb9390000,2,4,6,0\n"
- : [query] "=R" (*query)
- :
- : "cc", "0", "1");
-}
-
-/*
- * Testing Perform Function with Concurrent Results (PFCR)
- * CPU subfunctions's ASM block
- */
-static void test_pfcr_asm_block(u8 (*query)[16])
-{
- asm volatile(" lghi 0,0\n"
- " .insn rsy,0xeb0000000016,0,0,%[query]\n"
- : [query] "=QS" (*query)
- :
- : "cc", "0");
-}
-
-typedef void (*testfunc_t)(u8 (*array)[]);
-
-struct testdef {
- const char *subfunc_name;
- u8 *subfunc_array;
- size_t array_size;
- testfunc_t test;
- int facility_bit;
-} testlist[] = {
- /*
- * PLO was introduced in the very first 64-bit machine generation.
- * Hence it is assumed PLO is always installed in Z Arch.
- */
- { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
- /* MSA - Facility bit 17 */
- { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
- { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
- { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
- { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
- { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
- /* MSA - Facility bit 77 */
- { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
- { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
- { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
- { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
- /* MSA5 - Facility bit 57 */
- { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
- /* MSA8 - Facility bit 146 */
- { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
- /* MSA9 - Facility bit 155 */
- { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
- /* SORTL - Facility bit 150 */
- { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
- /* DFLTCC - Facility bit 151 */
- { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
- /* Concurrent-function facility - Facility bit 201 */
- { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
-};
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- int idx;
-
- ksft_print_header();
-
- vm = vm_create(1);
-
- memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
- get_cpu_machine_subfuntions(vm, &cpu_subfunc);
-
- ksft_set_plan(ARRAY_SIZE(testlist));
- for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
- if (test_facility(testlist[idx].facility_bit)) {
- u8 *array = malloc(testlist[idx].array_size);
-
- testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
-
- TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
- array, testlist[idx].array_size), 0);
-
- ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
- free(array);
- } else {
- ksft_test_result_skip("%s feature is not avaialable\n",
- testlist[idx].subfunc_name);
- }
- }
-
- kvm_vm_free(vm);
- ksft_finished();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/* Test KVM debugging features. */
-#include "kvm_util.h"
-#include "test_util.h"
-#include "sie.h"
-
-#include <linux/kvm.h>
-
-#define __LC_SVC_NEW_PSW 0x1c0
-#define __LC_PGM_NEW_PSW 0x1d0
-#define IPA0_DIAG 0x8300
-#define PGM_SPECIFICATION 0x06
-
-/* Common code for testing single-stepping interruptions. */
-extern char int_handler[];
-asm("int_handler:\n"
- "j .\n");
-
-static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
- size_t new_psw_off, uint64_t *new_psw)
-{
- struct kvm_guest_debug debug = {};
- struct kvm_regs regs;
- struct kvm_vm *vm;
- char *lowcore;
-
- vm = vm_create_with_one_vcpu(vcpu, guest_code);
- lowcore = addr_gpa2hva(vm, 0);
- new_psw[0] = (*vcpu)->run->psw_mask;
- new_psw[1] = (uint64_t)int_handler;
- memcpy(lowcore + new_psw_off, new_psw, 16);
- vcpu_regs_get(*vcpu, ®s);
- regs.gprs[2] = -1;
- vcpu_regs_set(*vcpu, ®s);
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
- vcpu_guest_debug_set(*vcpu, &debug);
- vcpu_run(*vcpu);
-
- return vm;
-}
-
-static void test_step_int(void *guest_code, size_t new_psw_off)
-{
- struct kvm_vcpu *vcpu;
- uint64_t new_psw[2];
- struct kvm_vm *vm;
-
- vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
- TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
- TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
- kvm_vm_free(vm);
-}
-
-/* Test single-stepping "boring" program interruptions. */
-extern char test_step_pgm_guest_code[];
-asm("test_step_pgm_guest_code:\n"
- ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
- "j .\n");
-
-static void test_step_pgm(void)
-{
- test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/*
- * Test single-stepping program interruptions caused by DIAG.
- * Userspace emulation must not interfere with single-stepping.
- */
-extern char test_step_pgm_diag_guest_code[];
-asm("test_step_pgm_diag_guest_code:\n"
- "diag %r0,%r0,0\n"
- "j .\n");
-
-static void test_step_pgm_diag(void)
-{
- struct kvm_s390_irq irq = {
- .type = KVM_S390_PROGRAM_INT,
- .u.pgm.code = PGM_SPECIFICATION,
- };
- struct kvm_vcpu *vcpu;
- uint64_t new_psw[2];
- struct kvm_vm *vm;
-
- vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
- __LC_PGM_NEW_PSW, new_psw);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
- TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
- TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
- vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
- TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
- TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
- kvm_vm_free(vm);
-}
-
-/*
- * Test single-stepping program interruptions caused by ISKE.
- * CPUSTAT_KSS handling must not interfere with single-stepping.
- */
-extern char test_step_pgm_iske_guest_code[];
-asm("test_step_pgm_iske_guest_code:\n"
- "iske %r2,%r2\n"
- "j .\n");
-
-static void test_step_pgm_iske(void)
-{
- test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/*
- * Test single-stepping program interruptions caused by LCTL.
- * KVM emulation must not interfere with single-stepping.
- */
-extern char test_step_pgm_lctl_guest_code[];
-asm("test_step_pgm_lctl_guest_code:\n"
- "lctl %c0,%c0,1\n"
- "j .\n");
-
-static void test_step_pgm_lctl(void)
-{
- test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/* Test single-stepping supervisor-call interruptions. */
-extern char test_step_svc_guest_code[];
-asm("test_step_svc_guest_code:\n"
- "svc 0\n"
- "j .\n");
-
-static void test_step_svc(void)
-{
- test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
-}
-
-/* Run all tests above. */
-static struct testdef {
- const char *name;
- void (*test)(void);
-} testlist[] = {
- { "single-step pgm", test_step_pgm },
- { "single-step pgm caused by diag", test_step_pgm_diag },
- { "single-step pgm caused by iske", test_step_pgm_iske },
- { "single-step pgm caused by lctl", test_step_pgm_lctl },
- { "single-step svc", test_step_svc },
-};
-
-int main(int argc, char *argv[])
-{
- int idx;
-
- ksft_print_header();
- ksft_set_plan(ARRAY_SIZE(testlist));
- for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
- testlist[idx].test();
- ksft_test_result_pass("%s\n", testlist[idx].name);
- }
- ksft_finished();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x KVM_S390_MEM_OP
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <pthread.h>
-
-#include <linux/bits.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-enum mop_target {
- LOGICAL,
- SIDA,
- ABSOLUTE,
- INVALID,
-};
-
-enum mop_access_mode {
- READ,
- WRITE,
- CMPXCHG,
-};
-
-struct mop_desc {
- uintptr_t gaddr;
- uintptr_t gaddr_v;
- uint64_t set_flags;
- unsigned int f_check : 1;
- unsigned int f_inject : 1;
- unsigned int f_key : 1;
- unsigned int _gaddr_v : 1;
- unsigned int _set_flags : 1;
- unsigned int _sida_offset : 1;
- unsigned int _ar : 1;
- uint32_t size;
- enum mop_target target;
- enum mop_access_mode mode;
- void *buf;
- uint32_t sida_offset;
- void *old;
- uint8_t old_value[16];
- bool *cmpxchg_success;
- uint8_t ar;
- uint8_t key;
-};
-
-const uint8_t NO_KEY = 0xff;
-
-static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
-{
- struct kvm_s390_mem_op ksmo = {
- .gaddr = (uintptr_t)desc->gaddr,
- .size = desc->size,
- .buf = ((uintptr_t)desc->buf),
- .reserved = "ignored_ignored_ignored_ignored"
- };
-
- switch (desc->target) {
- case LOGICAL:
- if (desc->mode == READ)
- ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
- if (desc->mode == WRITE)
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- break;
- case SIDA:
- if (desc->mode == READ)
- ksmo.op = KVM_S390_MEMOP_SIDA_READ;
- if (desc->mode == WRITE)
- ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
- break;
- case ABSOLUTE:
- if (desc->mode == READ)
- ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
- if (desc->mode == WRITE)
- ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
- if (desc->mode == CMPXCHG) {
- ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
- ksmo.old_addr = (uint64_t)desc->old;
- memcpy(desc->old_value, desc->old, desc->size);
- }
- break;
- case INVALID:
- ksmo.op = -1;
- }
- if (desc->f_check)
- ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
- if (desc->f_inject)
- ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
- if (desc->_set_flags)
- ksmo.flags = desc->set_flags;
- if (desc->f_key && desc->key != NO_KEY) {
- ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
- ksmo.key = desc->key;
- }
- if (desc->_ar)
- ksmo.ar = desc->ar;
- else
- ksmo.ar = 0;
- if (desc->_sida_offset)
- ksmo.sida_offset = desc->sida_offset;
-
- return ksmo;
-}
-
-struct test_info {
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
-};
-
-#define PRINT_MEMOP false
-static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
-{
- if (!PRINT_MEMOP)
- return;
-
- if (!vcpu)
- printf("vm memop(");
- else
- printf("vcpu memop(");
- switch (ksmo->op) {
- case KVM_S390_MEMOP_LOGICAL_READ:
- printf("LOGICAL, READ, ");
- break;
- case KVM_S390_MEMOP_LOGICAL_WRITE:
- printf("LOGICAL, WRITE, ");
- break;
- case KVM_S390_MEMOP_SIDA_READ:
- printf("SIDA, READ, ");
- break;
- case KVM_S390_MEMOP_SIDA_WRITE:
- printf("SIDA, WRITE, ");
- break;
- case KVM_S390_MEMOP_ABSOLUTE_READ:
- printf("ABSOLUTE, READ, ");
- break;
- case KVM_S390_MEMOP_ABSOLUTE_WRITE:
- printf("ABSOLUTE, WRITE, ");
- break;
- case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
- printf("ABSOLUTE, CMPXCHG, ");
- break;
- }
- printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
- ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
- ksmo->old_addr);
- if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
- printf(", CHECK_ONLY");
- if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
- printf(", INJECT_EXCEPTION");
- if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
- printf(", SKEY_PROTECTION");
- puts(")");
-}
-
-static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
- struct mop_desc *desc)
-{
- struct kvm_vcpu *vcpu = info.vcpu;
-
- if (!vcpu)
- return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
- else
- return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
-}
-
-static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
- struct mop_desc *desc)
-{
- int r;
-
- r = err_memop_ioctl(info, ksmo, desc);
- if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
- if (desc->cmpxchg_success) {
- int diff = memcmp(desc->old_value, desc->old, desc->size);
- *desc->cmpxchg_success = !diff;
- }
- }
- TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
-}
-
-#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \
-({ \
- struct test_info __info = (info_p); \
- struct mop_desc __desc = { \
- .target = (mop_target_p), \
- .mode = (access_mode_p), \
- .buf = (buf_p), \
- .size = (size_p), \
- __VA_ARGS__ \
- }; \
- struct kvm_s390_mem_op __ksmo; \
- \
- if (__desc._gaddr_v) { \
- if (__desc.target == ABSOLUTE) \
- __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
- else \
- __desc.gaddr = __desc.gaddr_v; \
- } \
- __ksmo = ksmo_from_desc(&__desc); \
- print_memop(__info.vcpu, &__ksmo); \
- err##memop_ioctl(__info, &__ksmo, &__desc); \
-})
-
-#define MOP(...) MEMOP(, __VA_ARGS__)
-#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
-
-#define GADDR(a) .gaddr = ((uintptr_t)a)
-#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
-#define CHECK_ONLY .f_check = 1
-#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
-#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
-#define AR(a) ._ar = 1, .ar = (a)
-#define KEY(a) .f_key = 1, .key = (a)
-#define INJECT .f_inject = 1
-#define CMPXCHG_OLD(o) .old = (o)
-#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
-
-#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
-
-#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
-#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-
-static uint8_t __aligned(PAGE_SIZE) mem1[65536];
-static uint8_t __aligned(PAGE_SIZE) mem2[65536];
-
-struct test_default {
- struct kvm_vm *kvm_vm;
- struct test_info vm;
- struct test_info vcpu;
- struct kvm_run *run;
- int size;
-};
-
-static struct test_default test_default_init(void *guest_code)
-{
- struct kvm_vcpu *vcpu;
- struct test_default t;
-
- t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
- t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- t.vm = (struct test_info) { t.kvm_vm, NULL };
- t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
- t.run = vcpu->run;
- return t;
-}
-
-enum stage {
- /* Synced state set by host, e.g. DAT */
- STAGE_INITED,
- /* Guest did nothing */
- STAGE_IDLED,
- /* Guest set storage keys (specifics up to test case) */
- STAGE_SKEYS_SET,
- /* Guest copied memory (locations up to test case) */
- STAGE_COPIED,
- /* End of guest code reached */
- STAGE_DONE,
-};
-
-#define HOST_SYNC(info_p, stage) \
-({ \
- struct test_info __info = (info_p); \
- struct kvm_vcpu *__vcpu = __info.vcpu; \
- struct ucall uc; \
- int __stage = (stage); \
- \
- vcpu_run(__vcpu); \
- get_ucall(__vcpu, &uc); \
- if (uc.cmd == UCALL_ABORT) { \
- REPORT_GUEST_ASSERT(uc); \
- } \
- TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
- TEST_ASSERT_EQ(uc.args[1], __stage); \
-}) \
-
-static void prepare_mem12(void)
-{
- int i;
-
- for (i = 0; i < sizeof(mem1); i++)
- mem1[i] = rand();
- memset(mem2, 0xaa, sizeof(mem2));
-}
-
-#define ASSERT_MEM_EQ(p1, p2, size) \
- TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
-
-static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
- enum mop_target mop_target, uint32_t size, uint8_t key)
-{
- prepare_mem12();
- CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
- GADDR_V(mem1), KEY(key));
- HOST_SYNC(copy_cpu, STAGE_COPIED);
- CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
- GADDR_V(mem2), KEY(key));
- ASSERT_MEM_EQ(mem1, mem2, size);
-}
-
-static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
- enum mop_target mop_target, uint32_t size, uint8_t key)
-{
- prepare_mem12();
- CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
- HOST_SYNC(copy_cpu, STAGE_COPIED);
- CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
- GADDR_V(mem2), KEY(key));
- ASSERT_MEM_EQ(mem1, mem2, size);
-}
-
-static void default_cmpxchg(struct test_default *test, uint8_t key)
-{
- for (int size = 1; size <= 16; size *= 2) {
- for (int offset = 0; offset < 16; offset += size) {
- uint8_t __aligned(16) new[16] = {};
- uint8_t __aligned(16) old[16];
- bool succ;
-
- prepare_mem12();
- default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
-
- memcpy(&old, mem1, 16);
- MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
- size, GADDR_V(mem1 + offset),
- CMPXCHG_OLD(old + offset),
- CMPXCHG_SUCCESS(&succ), KEY(key));
- HOST_SYNC(test->vcpu, STAGE_COPIED);
- MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
- TEST_ASSERT(succ, "exchange of values should succeed");
- memcpy(mem1 + offset, new + offset, size);
- ASSERT_MEM_EQ(mem1, mem2, 16);
-
- memcpy(&old, mem1, 16);
- new[offset]++;
- old[offset]++;
- MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
- size, GADDR_V(mem1 + offset),
- CMPXCHG_OLD(old + offset),
- CMPXCHG_SUCCESS(&succ), KEY(key));
- HOST_SYNC(test->vcpu, STAGE_COPIED);
- MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
- TEST_ASSERT(!succ, "exchange of values should not succeed");
- ASSERT_MEM_EQ(mem1, mem2, 16);
- ASSERT_MEM_EQ(&old, mem1, 16);
- }
- }
-}
-
-static void guest_copy(void)
-{
- GUEST_SYNC(STAGE_INITED);
- memcpy(&mem2, &mem1, sizeof(mem2));
- GUEST_SYNC(STAGE_COPIED);
-}
-
-static void test_copy(void)
-{
- struct test_default t = test_default_init(guest_copy);
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
-
- default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_copy_access_register(void)
-{
- struct test_default t = test_default_init(guest_copy);
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
-
- prepare_mem12();
- t.run->psw_mask &= ~(3UL << (63 - 17));
- t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
-
- /*
- * Primary address space gets used if an access register
- * contains zero. The host makes use of AR[1] so is a good
- * candidate to ensure the guest AR (of zero) is used.
- */
- CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
- GADDR_V(mem1), AR(1));
- HOST_SYNC(t.vcpu, STAGE_COPIED);
-
- CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
- GADDR_V(mem2), AR(1));
- ASSERT_MEM_EQ(mem1, mem2, t.size);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void set_storage_key_range(void *addr, size_t len, uint8_t key)
-{
- uintptr_t _addr, abs, i;
- int not_mapped = 0;
-
- _addr = (uintptr_t)addr;
- for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
- abs = i;
- asm volatile (
- "lra %[abs], 0(0,%[abs])\n"
- " jz 0f\n"
- " llill %[not_mapped],1\n"
- " j 1f\n"
- "0: sske %[key], %[abs]\n"
- "1:"
- : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
- : [key] "r" (key)
- : "cc"
- );
- GUEST_ASSERT_EQ(not_mapped, 0);
- }
-}
-
-static void guest_copy_key(void)
-{
- set_storage_key_range(mem1, sizeof(mem1), 0x90);
- set_storage_key_range(mem2, sizeof(mem2), 0x90);
- GUEST_SYNC(STAGE_SKEYS_SET);
-
- for (;;) {
- memcpy(&mem2, &mem1, sizeof(mem2));
- GUEST_SYNC(STAGE_COPIED);
- }
-}
-
-static void test_copy_key(void)
-{
- struct test_default t = test_default_init(guest_copy_key);
-
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vm, no key */
- default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
-
- /* vm/vcpu, machting key or key 0 */
- default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
- default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
- default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
- default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
- /*
- * There used to be different code paths for key handling depending on
- * if the region crossed a page boundary.
- * There currently are not, but the more tests the merrier.
- */
- default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
- default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
- default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
- default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
-
- /* vm/vcpu, mismatching keys on read, but no fetch protection */
- default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
- default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_cmpxchg_key(void)
-{
- struct test_default t = test_default_init(guest_copy_key);
-
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- default_cmpxchg(&t, NO_KEY);
- default_cmpxchg(&t, 0);
- default_cmpxchg(&t, 9);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static __uint128_t cut_to_size(int size, __uint128_t val)
-{
- switch (size) {
- case 1:
- return (uint8_t)val;
- case 2:
- return (uint16_t)val;
- case 4:
- return (uint32_t)val;
- case 8:
- return (uint64_t)val;
- case 16:
- return val;
- }
- GUEST_FAIL("Invalid size = %u", size);
- return 0;
-}
-
-static bool popcount_eq(__uint128_t a, __uint128_t b)
-{
- unsigned int count_a, count_b;
-
- count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
- __builtin_popcountl((uint64_t)a);
- count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
- __builtin_popcountl((uint64_t)b);
- return count_a == count_b;
-}
-
-static __uint128_t rotate(int size, __uint128_t val, int amount)
-{
- unsigned int bits = size * 8;
-
- amount = (amount + bits) % bits;
- val = cut_to_size(size, val);
- if (!amount)
- return val;
- return (val << (bits - amount)) | (val >> amount);
-}
-
-const unsigned int max_block = 16;
-
-static void choose_block(bool guest, int i, int *size, int *offset)
-{
- unsigned int rand;
-
- rand = i;
- if (guest) {
- rand = rand * 19 + 11;
- *size = 1 << ((rand % 3) + 2);
- rand = rand * 19 + 11;
- *offset = (rand % max_block) & ~(*size - 1);
- } else {
- rand = rand * 17 + 5;
- *size = 1 << (rand % 5);
- rand = rand * 17 + 5;
- *offset = (rand % max_block) & ~(*size - 1);
- }
-}
-
-static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
-{
- unsigned int rand;
- int amount;
- bool swap;
-
- rand = i;
- rand = rand * 3 + 1;
- if (guest)
- rand = rand * 3 + 1;
- swap = rand % 2 == 0;
- if (swap) {
- int i, j;
- __uint128_t new;
- uint8_t byte0, byte1;
-
- rand = rand * 3 + 1;
- i = rand % size;
- rand = rand * 3 + 1;
- j = rand % size;
- if (i == j)
- return old;
- new = rotate(16, old, i * 8);
- byte0 = new & 0xff;
- new &= ~0xff;
- new = rotate(16, new, -i * 8);
- new = rotate(16, new, j * 8);
- byte1 = new & 0xff;
- new = (new & ~0xff) | byte0;
- new = rotate(16, new, -j * 8);
- new = rotate(16, new, i * 8);
- new = new | byte1;
- new = rotate(16, new, -i * 8);
- return new;
- }
- rand = rand * 3 + 1;
- amount = rand % (size * 8);
- return rotate(size, old, amount);
-}
-
-static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
-{
- bool ret;
-
- switch (size) {
- case 4: {
- uint32_t old = *old_addr;
-
- asm volatile ("cs %[old],%[new],%[address]"
- : [old] "+d" (old),
- [address] "+Q" (*(uint32_t *)(target))
- : [new] "d" ((uint32_t)new)
- : "cc"
- );
- ret = old == (uint32_t)*old_addr;
- *old_addr = old;
- return ret;
- }
- case 8: {
- uint64_t old = *old_addr;
-
- asm volatile ("csg %[old],%[new],%[address]"
- : [old] "+d" (old),
- [address] "+Q" (*(uint64_t *)(target))
- : [new] "d" ((uint64_t)new)
- : "cc"
- );
- ret = old == (uint64_t)*old_addr;
- *old_addr = old;
- return ret;
- }
- case 16: {
- __uint128_t old = *old_addr;
-
- asm volatile ("cdsg %[old],%[new],%[address]"
- : [old] "+d" (old),
- [address] "+Q" (*(__uint128_t *)(target))
- : [new] "d" (new)
- : "cc"
- );
- ret = old == *old_addr;
- *old_addr = old;
- return ret;
- }
- }
- GUEST_FAIL("Invalid size = %u", size);
- return 0;
-}
-
-const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
-
-static void guest_cmpxchg_key(void)
-{
- int size, offset;
- __uint128_t old, new;
-
- set_storage_key_range(mem1, max_block, 0x10);
- set_storage_key_range(mem2, max_block, 0x10);
- GUEST_SYNC(STAGE_SKEYS_SET);
-
- for (int i = 0; i < cmpxchg_iter_outer; i++) {
- do {
- old = 1;
- } while (!_cmpxchg(16, mem1, &old, 0));
- for (int j = 0; j < cmpxchg_iter_inner; j++) {
- choose_block(true, i + j, &size, &offset);
- do {
- new = permutate_bits(true, i + j, size, old);
- } while (!_cmpxchg(size, mem2 + offset, &old, new));
- }
- }
-
- GUEST_SYNC(STAGE_DONE);
-}
-
-static void *run_guest(void *data)
-{
- struct test_info *info = data;
-
- HOST_SYNC(*info, STAGE_DONE);
- return NULL;
-}
-
-static char *quad_to_char(__uint128_t *quad, int size)
-{
- return ((char *)quad) + (sizeof(*quad) - size);
-}
-
-static void test_cmpxchg_key_concurrent(void)
-{
- struct test_default t = test_default_init(guest_cmpxchg_key);
- int size, offset;
- __uint128_t old, new;
- bool success;
- pthread_t thread;
-
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
- prepare_mem12();
- MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
- pthread_create(&thread, NULL, run_guest, &t.vcpu);
-
- for (int i = 0; i < cmpxchg_iter_outer; i++) {
- do {
- old = 0;
- new = 1;
- MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
- sizeof(new), GADDR_V(mem1),
- CMPXCHG_OLD(&old),
- CMPXCHG_SUCCESS(&success), KEY(1));
- } while (!success);
- for (int j = 0; j < cmpxchg_iter_inner; j++) {
- choose_block(false, i + j, &size, &offset);
- do {
- new = permutate_bits(false, i + j, size, old);
- MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
- size, GADDR_V(mem2 + offset),
- CMPXCHG_OLD(quad_to_char(&old, size)),
- CMPXCHG_SUCCESS(&success), KEY(1));
- } while (!success);
- }
- }
-
- pthread_join(thread, NULL);
-
- MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
- TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
- "Must retain number of set bits");
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void guest_copy_key_fetch_prot(void)
-{
- /*
- * For some reason combining the first sync with override enablement
- * results in an exception when calling HOST_SYNC.
- */
- GUEST_SYNC(STAGE_INITED);
- /* Storage protection override applies to both store and fetch. */
- set_storage_key_range(mem1, sizeof(mem1), 0x98);
- set_storage_key_range(mem2, sizeof(mem2), 0x98);
- GUEST_SYNC(STAGE_SKEYS_SET);
-
- for (;;) {
- memcpy(&mem2, &mem1, sizeof(mem2));
- GUEST_SYNC(STAGE_COPIED);
- }
-}
-
-static void test_copy_key_storage_prot_override(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
- t.run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vcpu, mismatching keys, storage protection override in effect */
- default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_copy_key_fetch_prot(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vm/vcpu, matching key, fetch protection in effect */
- default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
- default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-#define ERR_PROT_MOP(...) \
-({ \
- int rv; \
- \
- rv = ERR_MOP(__VA_ARGS__); \
- TEST_ASSERT(rv == 4, "Should result in protection exception"); \
-})
-
-static void guest_error_key(void)
-{
- GUEST_SYNC(STAGE_INITED);
- set_storage_key_range(mem1, PAGE_SIZE, 0x18);
- set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
- GUEST_SYNC(STAGE_SKEYS_SET);
- GUEST_SYNC(STAGE_IDLED);
-}
-
-static void test_errors_key(void)
-{
- struct test_default t = test_default_init(guest_error_key);
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vm/vcpu, mismatching keys, fetch protection in effect */
- CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
- CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
- CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
- CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_cmpxchg_key(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot);
- int i;
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- for (i = 1; i <= 16; i *= 2) {
- __uint128_t old = 0;
-
- ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
- CMPXCHG_OLD(&old), KEY(2));
- }
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_termination(void)
-{
- struct test_default t = test_default_init(guest_error_key);
- uint64_t prefix;
- uint64_t teid;
- uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
- uint64_t psw[2];
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vcpu, mismatching keys after first page */
- ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
- /*
- * The memop injected a program exception and the test needs to check the
- * Translation-Exception Identification (TEID). It is necessary to run
- * the guest in order to be able to read the TEID from guest memory.
- * Set the guest program new PSW, so the guest state is not clobbered.
- */
- prefix = t.run->s.regs.prefix;
- psw[0] = t.run->psw_mask;
- psw[1] = t.run->psw_addr;
- MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
- HOST_SYNC(t.vcpu, STAGE_IDLED);
- MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
- /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
- TEST_ASSERT_EQ(teid & teid_mask, 0);
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_storage_prot_override(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
- t.run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vm, mismatching keys, storage protection override not applicable to vm */
- CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
- CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
-
- kvm_vm_free(t.kvm_vm);
-}
-
-const uint64_t last_page_addr = -PAGE_SIZE;
-
-static void guest_copy_key_fetch_prot_override(void)
-{
- int i;
- char *page_0 = 0;
-
- GUEST_SYNC(STAGE_INITED);
- set_storage_key_range(0, PAGE_SIZE, 0x18);
- set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
- asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
- GUEST_SYNC(STAGE_SKEYS_SET);
-
- for (;;) {
- for (i = 0; i < PAGE_SIZE; i++)
- page_0[i] = mem1[i];
- GUEST_SYNC(STAGE_COPIED);
- }
-}
-
-static void test_copy_key_fetch_prot_override(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
- vm_vaddr_t guest_0_page, guest_last_page;
-
- guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
- guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
- if (guest_0_page != 0 || guest_last_page != last_page_addr) {
- print_skip("did not allocate guest pages at required positions");
- goto out;
- }
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
- t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
- t.run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vcpu, mismatching keys on fetch, fetch protection override applies */
- prepare_mem12();
- MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
- HOST_SYNC(t.vcpu, STAGE_COPIED);
- CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
- ASSERT_MEM_EQ(mem1, mem2, 2048);
-
- /*
- * vcpu, mismatching keys on fetch, fetch protection override applies,
- * wraparound
- */
- prepare_mem12();
- MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
- HOST_SYNC(t.vcpu, STAGE_COPIED);
- CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
- GADDR_V(guest_last_page), KEY(2));
- ASSERT_MEM_EQ(mem1, mem2, 2048);
-
-out:
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_fetch_prot_override_not_enabled(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
- vm_vaddr_t guest_0_page, guest_last_page;
-
- guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
- guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
- if (guest_0_page != 0 || guest_last_page != last_page_addr) {
- print_skip("did not allocate guest pages at required positions");
- goto out;
- }
- HOST_SYNC(t.vcpu, STAGE_INITED);
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
- CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
-
-out:
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_fetch_prot_override_enabled(void)
-{
- struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
- vm_vaddr_t guest_0_page, guest_last_page;
-
- guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
- guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
- if (guest_0_page != 0 || guest_last_page != last_page_addr) {
- print_skip("did not allocate guest pages at required positions");
- goto out;
- }
- HOST_SYNC(t.vcpu, STAGE_INITED);
- t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
- t.run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
- /*
- * vcpu, mismatching keys on fetch,
- * fetch protection override does not apply because memory range exceeded
- */
- CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
- CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
- GADDR_V(guest_last_page), KEY(2));
- /* vm, fetch protected override does not apply */
- CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
- CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
-
-out:
- kvm_vm_free(t.kvm_vm);
-}
-
-static void guest_idle(void)
-{
- GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
- for (;;)
- GUEST_SYNC(STAGE_IDLED);
-}
-
-static void _test_errors_common(struct test_info info, enum mop_target target, int size)
-{
- int rv;
-
- /* Bad size: */
- rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
- TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
-
- /* Zero size: */
- rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
- TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
- "ioctl allows 0 as size");
-
- /* Bad flags: */
- rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
-
- /* Bad guest address: */
- rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
- TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
- rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
- TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
-
- /* Bad host address: */
- rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
- TEST_ASSERT(rv == -1 && errno == EFAULT,
- "ioctl does not report bad host memory address");
-
- /* Bad key: */
- rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
-}
-
-static void test_errors(void)
-{
- struct test_default t = test_default_init(guest_idle);
- int rv;
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
-
- _test_errors_common(t.vcpu, LOGICAL, t.size);
- _test_errors_common(t.vm, ABSOLUTE, t.size);
-
- /* Bad operation: */
- rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
- /* virtual addresses are not translated when passing INVALID */
- rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-
- /* Bad access register: */
- t.run->psw_mask &= ~(3UL << (63 - 17));
- t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
- HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
- rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
- t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
- HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
-
- /* Check that the SIDA calls are rejected for non-protected guests */
- rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
- TEST_ASSERT(rv == -1 && errno == EINVAL,
- "ioctl does not reject SIDA_READ in non-protected mode");
- rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
- TEST_ASSERT(rv == -1 && errno == EINVAL,
- "ioctl does not reject SIDA_WRITE in non-protected mode");
-
- kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_cmpxchg(void)
-{
- struct test_default t = test_default_init(guest_idle);
- __uint128_t old;
- int rv, i, power = 1;
-
- HOST_SYNC(t.vcpu, STAGE_INITED);
-
- for (i = 0; i < 32; i++) {
- if (i == power) {
- power *= 2;
- continue;
- }
- rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
- CMPXCHG_OLD(&old));
- TEST_ASSERT(rv == -1 && errno == EINVAL,
- "ioctl allows bad size for cmpxchg");
- }
- for (i = 1; i <= 16; i *= 2) {
- rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
- CMPXCHG_OLD(&old));
- TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
- }
- for (i = 2; i <= 16; i *= 2) {
- rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
- CMPXCHG_OLD(&old));
- TEST_ASSERT(rv == -1 && errno == EINVAL,
- "ioctl allows bad alignment for cmpxchg");
- }
-
- kvm_vm_free(t.kvm_vm);
-}
-
-int main(int argc, char *argv[])
-{
- int extension_cap, idx;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
- extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
-
- struct testdef {
- const char *name;
- void (*test)(void);
- bool requirements_met;
- } testlist[] = {
- {
- .name = "simple copy",
- .test = test_copy,
- .requirements_met = true,
- },
- {
- .name = "generic error checks",
- .test = test_errors,
- .requirements_met = true,
- },
- {
- .name = "copy with storage keys",
- .test = test_copy_key,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "cmpxchg with storage keys",
- .test = test_cmpxchg_key,
- .requirements_met = extension_cap & 0x2,
- },
- {
- .name = "concurrently cmpxchg with storage keys",
- .test = test_cmpxchg_key_concurrent,
- .requirements_met = extension_cap & 0x2,
- },
- {
- .name = "copy with key storage protection override",
- .test = test_copy_key_storage_prot_override,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "copy with key fetch protection",
- .test = test_copy_key_fetch_prot,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "copy with key fetch protection override",
- .test = test_copy_key_fetch_prot_override,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "copy with access register mode",
- .test = test_copy_access_register,
- .requirements_met = true,
- },
- {
- .name = "error checks with key",
- .test = test_errors_key,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "error checks for cmpxchg with key",
- .test = test_errors_cmpxchg_key,
- .requirements_met = extension_cap & 0x2,
- },
- {
- .name = "error checks for cmpxchg",
- .test = test_errors_cmpxchg,
- .requirements_met = extension_cap & 0x2,
- },
- {
- .name = "termination",
- .test = test_termination,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "error checks with key storage protection override",
- .test = test_errors_key_storage_prot_override,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "error checks without key fetch prot override",
- .test = test_errors_key_fetch_prot_override_not_enabled,
- .requirements_met = extension_cap > 0,
- },
- {
- .name = "error checks with key fetch prot override",
- .test = test_errors_key_fetch_prot_override_enabled,
- .requirements_met = extension_cap > 0,
- },
- };
-
- ksft_print_header();
- ksft_set_plan(ARRAY_SIZE(testlist));
-
- for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
- if (testlist[idx].requirements_met) {
- testlist[idx].test();
- ksft_test_result_pass("%s\n", testlist[idx].name);
- } else {
- ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
- testlist[idx].name, extension_cap);
- }
- }
-
- ksft_finished(); /* Print results and exit() accordingly */
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x CPU resets
- *
- * Copyright (C) 2020, IBM
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-
-#define LOCAL_IRQS 32
-
-#define ARBITRARY_NON_ZERO_VCPU_ID 3
-
-struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-
-static uint8_t regs_null[512];
-
-static void guest_code_initial(void)
-{
- /* set several CRs to "safe" value */
- unsigned long cr2_59 = 0x10; /* enable guarded storage */
- unsigned long cr8_63 = 0x1; /* monitor mask = 1 */
- unsigned long cr10 = 1; /* PER START */
- unsigned long cr11 = -1; /* PER END */
-
-
- /* Dirty registers */
- asm volatile (
- " lghi 2,0x11\n" /* Round toward 0 */
- " sfpc 2\n" /* set fpc to !=0 */
- " lctlg 2,2,%0\n"
- " lctlg 8,8,%1\n"
- " lctlg 10,10,%2\n"
- " lctlg 11,11,%3\n"
- /* now clobber some general purpose regs */
- " llihh 0,0xffff\n"
- " llihl 1,0x5555\n"
- " llilh 2,0xaaaa\n"
- " llill 3,0x0000\n"
- /* now clobber a floating point reg */
- " lghi 4,0x1\n"
- " cdgbr 0,4\n"
- /* now clobber an access reg */
- " sar 9,4\n"
- /* We embed diag 501 here to control register content */
- " diag 0,0,0x501\n"
- :
- : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
- /* no clobber list as this should not return */
- );
-}
-
-static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
-{
- uint64_t eval_reg;
-
- eval_reg = vcpu_get_reg(vcpu, id);
- TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
-}
-
-static void assert_noirq(struct kvm_vcpu *vcpu)
-{
- struct kvm_s390_irq_state irq_state;
- int irqs;
-
- irq_state.len = sizeof(buf);
- irq_state.buf = (unsigned long)buf;
- irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
- /*
- * irqs contains the number of retrieved interrupts. Any interrupt
- * (notably, the emergency call interrupt we have injected) should
- * be cleared by the resets, so this should be 0.
- */
- TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
- TEST_ASSERT(!irqs, "IRQ pending");
-}
-
-static void assert_clear(struct kvm_vcpu *vcpu)
-{
- struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
- struct kvm_sregs sregs;
- struct kvm_regs regs;
- struct kvm_fpu fpu;
-
- vcpu_regs_get(vcpu, ®s);
- TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
-
- vcpu_sregs_get(vcpu, &sregs);
- TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
-
- vcpu_fpu_get(vcpu, &fpu);
- TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
-
- /* sync regs */
- TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
- "gprs0-15 == 0 (sync_regs)");
-
- TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
- "acrs0-15 == 0 (sync_regs)");
-
- TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
- "vrs0-15 == 0 (sync_regs)");
-}
-
-static void assert_initial_noclear(struct kvm_vcpu *vcpu)
-{
- struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-
- TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
- "gpr0 == 0xffff000000000000 (sync_regs)");
- TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
- "gpr1 == 0x0000555500000000 (sync_regs)");
- TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
- "gpr2 == 0x00000000aaaa0000 (sync_regs)");
- TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
- "gpr3 == 0x0000000000000000 (sync_regs)");
- TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
- "fpr0 == 0f1 (sync_regs)");
- TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
-}
-
-static void assert_initial(struct kvm_vcpu *vcpu)
-{
- struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
- struct kvm_sregs sregs;
- struct kvm_fpu fpu;
-
- /* KVM_GET_SREGS */
- vcpu_sregs_get(vcpu, &sregs);
- TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
- TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
- "cr14 == 0xC2000000 (KVM_GET_SREGS)");
- TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
- "cr1-13 == 0 (KVM_GET_SREGS)");
- TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
-
- /* sync regs */
- TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
- TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
- "cr14 == 0xC2000000 (sync_regs)");
- TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
- "cr1-13 == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
- TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
-
- /* kvm_run */
- TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
- TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
-
- vcpu_fpu_get(vcpu, &fpu);
- TEST_ASSERT(!fpu.fpc, "fpc == 0");
-
- test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
- test_one_reg(vcpu, KVM_REG_S390_PP, 0);
- test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
- test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
- test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
-}
-
-static void assert_normal_noclear(struct kvm_vcpu *vcpu)
-{
- struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-
- TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
- TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
- TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
- TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
-}
-
-static void assert_normal(struct kvm_vcpu *vcpu)
-{
- test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
- TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
- "pft == 0xff..... (sync_regs)");
- assert_noirq(vcpu);
-}
-
-static void inject_irq(struct kvm_vcpu *vcpu)
-{
- struct kvm_s390_irq_state irq_state;
- struct kvm_s390_irq *irq = &buf[0];
- int irqs;
-
- /* Inject IRQ */
- irq_state.len = sizeof(struct kvm_s390_irq);
- irq_state.buf = (unsigned long)buf;
- irq->type = KVM_S390_INT_EMERGENCY;
- irq->u.emerg.code = vcpu->id;
- irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
- TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
-}
-
-static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
-{
- struct kvm_vm *vm;
-
- vm = vm_create(1);
-
- *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
-
- return vm;
-}
-
-static void test_normal(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- ksft_print_msg("Testing normal reset\n");
- vm = create_vm(&vcpu);
-
- vcpu_run(vcpu);
-
- inject_irq(vcpu);
-
- vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
-
- /* must clears */
- assert_normal(vcpu);
- /* must not clears */
- assert_normal_noclear(vcpu);
- assert_initial_noclear(vcpu);
-
- kvm_vm_free(vm);
-}
-
-static void test_initial(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- ksft_print_msg("Testing initial reset\n");
- vm = create_vm(&vcpu);
-
- vcpu_run(vcpu);
-
- inject_irq(vcpu);
-
- vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
-
- /* must clears */
- assert_normal(vcpu);
- assert_initial(vcpu);
- /* must not clears */
- assert_initial_noclear(vcpu);
-
- kvm_vm_free(vm);
-}
-
-static void test_clear(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- ksft_print_msg("Testing clear reset\n");
- vm = create_vm(&vcpu);
-
- vcpu_run(vcpu);
-
- inject_irq(vcpu);
-
- vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
-
- /* must clears */
- assert_normal(vcpu);
- assert_initial(vcpu);
- assert_clear(vcpu);
-
- kvm_vm_free(vm);
-}
-
-struct testdef {
- const char *name;
- void (*test)(void);
- bool needs_cap;
-} testlist[] = {
- { "initial", test_initial, false },
- { "normal", test_normal, true },
- { "clear", test_clear, true },
-};
-
-int main(int argc, char *argv[])
-{
- bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
- int idx;
-
- ksft_print_header();
- ksft_set_plan(ARRAY_SIZE(testlist));
-
- for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
- if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
- testlist[idx].test();
- ksft_test_result_pass("%s\n", testlist[idx].name);
- } else {
- ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
- testlist[idx].name);
- }
- }
-
- ksft_finished(); /* Print results and exit() accordingly */
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test shared zeropage handling (with/without storage keys)
- *
- * Copyright (C) 2024, Red Hat, Inc.
- */
-#include <sys/mman.h>
-
-#include <linux/fs.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-
-static void set_storage_key(void *addr, uint8_t skey)
-{
- asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
-}
-
-static void guest_code(void)
-{
- /* Issue some storage key instruction. */
- set_storage_key((void *)0, 0x98);
- GUEST_DONE();
-}
-
-/*
- * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
- * Returns < 0 on error or if nothing is mapped.
- */
-static int maps_shared_zeropage(int pagemap_fd, void *addr)
-{
- struct page_region region;
- struct pm_scan_arg arg = {
- .start = (uintptr_t)addr,
- .end = (uintptr_t)addr + 4096,
- .vec = (uintptr_t)®ion,
- .vec_len = 1,
- .size = sizeof(struct pm_scan_arg),
- .category_mask = PAGE_IS_PFNZERO,
- .category_anyof_mask = PAGE_IS_PRESENT,
- .return_mask = PAGE_IS_PFNZERO,
- };
- return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
-}
-
-int main(int argc, char *argv[])
-{
- char *mem, *page0, *page1, *page2, tmp;
- const size_t pagesize = getpagesize();
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- int pagemap_fd;
-
- ksft_print_header();
- ksft_set_plan(3);
-
- /*
- * We'll use memory that is not mapped into the VM for simplicity.
- * Shared zeropages are enabled/disabled per-process.
- */
- mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
-
- /* Disable THP. Ignore errors on older kernels. */
- madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
-
- page0 = mem;
- page1 = page0 + pagesize;
- page2 = page1 + pagesize;
-
- /* Can we even detect shared zeropages? */
- pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
- TEST_REQUIRE(pagemap_fd >= 0);
-
- tmp = *page0;
- asm volatile("" : "+r" (tmp));
- TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- /* Verify that we get the shared zeropage after VM creation. */
- tmp = *page1;
- asm volatile("" : "+r" (tmp));
- ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
- "Shared zeropages should be enabled\n");
-
- /*
- * Let our VM execute a storage key instruction that should
- * unshare all shared zeropages.
- */
- vcpu_run(vcpu);
- get_ucall(vcpu, &uc);
- TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
-
- /* Verify that we don't have a shared zeropage anymore. */
- ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
- "Shared zeropage should be gone\n");
-
- /* Verify that we don't get any new shared zeropages. */
- tmp = *page2;
- asm volatile("" : "+r" (tmp));
- ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
- "Shared zeropages should be disabled\n");
-
- kvm_vm_free(vm);
-
- ksft_finished();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for s390x KVM_CAP_SYNC_REGS
- *
- * Based on the same test for x86:
- * Copyright (C) 2018, Google LLC.
- *
- * Adaptions for s390x:
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "diag318_test_handler.h"
-#include "kselftest.h"
-
-static void guest_code(void)
-{
- /*
- * We embed diag 501 here instead of doing a ucall to avoid that
- * the compiler has messed with r11 at the time of the ucall.
- */
- asm volatile (
- "0: diag 0,0,0x501\n"
- " ahi 11,1\n"
- " j 0b\n"
- );
-}
-
-#define REG_COMPARE(reg) \
- TEST_ASSERT(left->reg == right->reg, \
- "Register " #reg \
- " values did not match: 0x%llx, 0x%llx", \
- left->reg, right->reg)
-
-#define REG_COMPARE32(reg) \
- TEST_ASSERT(left->reg == right->reg, \
- "Register " #reg \
- " values did not match: 0x%x, 0x%x", \
- left->reg, right->reg)
-
-
-static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
-{
- int i;
-
- for (i = 0; i < 16; i++)
- REG_COMPARE(gprs[i]);
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
-{
- int i;
-
- for (i = 0; i < 16; i++)
- REG_COMPARE32(acrs[i]);
-
- for (i = 0; i < 16; i++)
- REG_COMPARE(crs[i]);
-}
-
-#undef REG_COMPARE
-
-#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
-#define INVALID_SYNC_FIELD 0x80000000
-
-void test_read_invalid(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- int rv;
-
- /* Request reading invalid register set from VCPU. */
- run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_valid_regs = 0;
-
- run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_valid_regs = 0;
-}
-
-void test_set_invalid(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- int rv;
-
- /* Request setting invalid register set into VCPU. */
- run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_dirty_regs = 0;
-
- run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_dirty_regs = 0;
-}
-
-void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_sregs sregs;
- struct kvm_regs regs;
- int rv;
-
- /* Request and verify all valid register sets. */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
- TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
- (run->s390_sieic.ipa >> 8) == 0x83 &&
- (run->s390_sieic.ipb >> 16) == 0x501,
- "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
- run->s390_sieic.icptcode, run->s390_sieic.ipa,
- run->s390_sieic.ipb);
-
- vcpu_regs_get(vcpu, ®s);
- compare_regs(®s, &run->s.regs);
-
- vcpu_sregs_get(vcpu, &sregs);
- compare_sregs(&sregs, &run->s.regs);
-}
-
-void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_sregs sregs;
- struct kvm_regs regs;
- int rv;
-
- /* Set and verify various register values */
- run->s.regs.gprs[11] = 0xBAD1DEA;
- run->s.regs.acrs[0] = 1 << 11;
-
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
-
- if (get_diag318_info() > 0) {
- run->s.regs.diag318 = get_diag318_info();
- run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
- }
-
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
- TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
- "r11 sync regs value incorrect 0x%llx.",
- run->s.regs.gprs[11]);
- TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11,
- "acr0 sync regs value incorrect 0x%x.",
- run->s.regs.acrs[0]);
- TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
- "diag318 sync regs value incorrect 0x%llx.",
- run->s.regs.diag318);
-
- vcpu_regs_get(vcpu, ®s);
- compare_regs(®s, &run->s.regs);
-
- vcpu_sregs_get(vcpu, &sregs);
- compare_sregs(&sregs, &run->s.regs);
-}
-
-void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- int rv;
-
- /* Clear kvm_dirty_regs bits, verify new s.regs values are
- * overwritten with existing guest values.
- */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- run->kvm_dirty_regs = 0;
- run->s.regs.gprs[11] = 0xDEADBEEF;
- run->s.regs.diag318 = 0x4B1D;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
- TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
- "r11 sync regs value incorrect 0x%llx.",
- run->s.regs.gprs[11]);
- TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
- "diag318 sync regs value incorrect 0x%llx.",
- run->s.regs.diag318);
-}
-
-struct testdef {
- const char *name;
- void (*test)(struct kvm_vcpu *vcpu);
-} testlist[] = {
- { "read invalid", test_read_invalid },
- { "set invalid", test_set_invalid },
- { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
- { "set+verify various regs", test_set_and_verify_various_reg_values },
- { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
-};
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- int idx;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
-
- ksft_print_header();
-
- ksft_set_plan(ARRAY_SIZE(testlist));
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
- testlist[idx].test(vcpu);
- ksft_test_result_pass("%s\n", testlist[idx].name);
- }
-
- kvm_vm_free(vm);
-
- ksft_finished(); /* Print results and exit() accordingly */
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test TEST PROTECTION emulation.
- *
- * Copyright IBM Corp. 2021
- */
-#include <sys/mman.h>
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
-#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-
-static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
-static uint8_t *const page_store_prot = pages[0];
-static uint8_t *const page_fetch_prot = pages[1];
-
-/* Nonzero return value indicates that address not mapped */
-static int set_storage_key(void *addr, uint8_t key)
-{
- int not_mapped = 0;
-
- asm volatile (
- "lra %[addr], 0(0,%[addr])\n"
- " jz 0f\n"
- " llill %[not_mapped],1\n"
- " j 1f\n"
- "0: sske %[key], %[addr]\n"
- "1:"
- : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
- : [key] "r" (key)
- : "cc"
- );
- return -not_mapped;
-}
-
-enum permission {
- READ_WRITE = 0,
- READ = 1,
- RW_PROTECTED = 2,
- TRANSL_UNAVAIL = 3,
-};
-
-static enum permission test_protection(void *addr, uint8_t key)
-{
- uint64_t mask;
-
- asm volatile (
- "tprot %[addr], 0(%[key])\n"
- " ipm %[mask]\n"
- : [mask] "=r" (mask)
- : [addr] "Q" (*(char *)addr),
- [key] "a" (key)
- : "cc"
- );
-
- return (enum permission)(mask >> 28);
-}
-
-enum stage {
- STAGE_INIT_SIMPLE,
- TEST_SIMPLE,
- STAGE_INIT_FETCH_PROT_OVERRIDE,
- TEST_FETCH_PROT_OVERRIDE,
- TEST_STORAGE_PROT_OVERRIDE,
- STAGE_END /* must be the last entry (it's the amount of tests) */
-};
-
-struct test {
- enum stage stage;
- void *addr;
- uint8_t key;
- enum permission expected;
-} tests[] = {
- /*
- * We perform each test in the array by executing TEST PROTECTION on
- * the specified addr with the specified key and checking if the returned
- * permissions match the expected value.
- * Both guest and host cooperate to set up the required test conditions.
- * A central condition is that the page targeted by addr has to be DAT
- * protected in the host mappings, in order for KVM to emulate the
- * TEST PROTECTION instruction.
- * Since the page tables are shared, the host uses mprotect to achieve
- * this.
- *
- * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
- * by SIE, not KVM, but there is no harm in testing them also.
- * See Enhanced Suppression-on-Protection Facilities in the
- * Interpretive-Execution Mode
- */
- /*
- * guest: set storage key of page_store_prot to 1
- * storage key of page_fetch_prot to 9 and enable
- * protection for it
- * STAGE_INIT_SIMPLE
- * host: write protect both via mprotect
- */
- /* access key 0 matches any storage key -> RW */
- { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
- /* access key matches storage key -> RW */
- { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
- /* mismatched keys, but no fetch protection -> RO */
- { TEST_SIMPLE, page_store_prot, 0x20, READ },
- /* access key 0 matches any storage key -> RW */
- { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
- /* access key matches storage key -> RW */
- { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
- /* mismatched keys, fetch protection -> inaccessible */
- { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
- /* page 0 not mapped yet -> translation not available */
- { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
- /*
- * host: try to map page 0
- * guest: set storage key of page 0 to 9 and enable fetch protection
- * STAGE_INIT_FETCH_PROT_OVERRIDE
- * host: write protect page 0
- * enable fetch protection override
- */
- /* mismatched keys, fetch protection, but override applies -> RO */
- { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
- /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
- { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
- /*
- * host: enable storage protection override
- */
- /* mismatched keys, but override applies (storage key 9) -> RW */
- { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
- /* mismatched keys, no fetch protection, override doesn't apply -> RO */
- { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
- /* mismatched keys, but override applies (storage key 9) -> RW */
- { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
- /* end marker */
- { STAGE_END, 0, 0, 0 },
-};
-
-static enum stage perform_next_stage(int *i, bool mapped_0)
-{
- enum stage stage = tests[*i].stage;
- enum permission result;
- bool skip;
-
- for (; tests[*i].stage == stage; (*i)++) {
- /*
- * Some fetch protection override tests require that page 0
- * be mapped, however, when the hosts tries to map that page via
- * vm_vaddr_alloc, it may happen that some other page gets mapped
- * instead.
- * In order to skip these tests we detect this inside the guest
- */
- skip = tests[*i].addr < (void *)PAGE_SIZE &&
- tests[*i].expected != TRANSL_UNAVAIL &&
- !mapped_0;
- if (!skip) {
- result = test_protection(tests[*i].addr, tests[*i].key);
- __GUEST_ASSERT(result == tests[*i].expected,
- "Wanted %u, got %u, for i = %u",
- tests[*i].expected, result, *i);
- }
- }
- return stage;
-}
-
-static void guest_code(void)
-{
- bool mapped_0;
- int i = 0;
-
- GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
- GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
- GUEST_SYNC(STAGE_INIT_SIMPLE);
- GUEST_SYNC(perform_next_stage(&i, false));
-
- /* Fetch-protection override */
- mapped_0 = !set_storage_key((void *)0, 0x98);
- GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
- GUEST_SYNC(perform_next_stage(&i, mapped_0));
-
- /* Storage-protection override */
- GUEST_SYNC(perform_next_stage(&i, mapped_0));
-}
-
-#define HOST_SYNC_NO_TAP(vcpup, stage) \
-({ \
- struct kvm_vcpu *__vcpu = (vcpup); \
- struct ucall uc; \
- int __stage = (stage); \
- \
- vcpu_run(__vcpu); \
- get_ucall(__vcpu, &uc); \
- if (uc.cmd == UCALL_ABORT) \
- REPORT_GUEST_ASSERT(uc); \
- TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
- TEST_ASSERT_EQ(uc.args[1], __stage); \
-})
-
-#define HOST_SYNC(vcpu, stage) \
-({ \
- HOST_SYNC_NO_TAP(vcpu, stage); \
- ksft_test_result_pass("" #stage "\n"); \
-})
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct kvm_run *run;
- vm_vaddr_t guest_0_page;
-
- ksft_print_header();
- ksft_set_plan(STAGE_END);
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu->run;
-
- HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
- mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
- HOST_SYNC(vcpu, TEST_SIMPLE);
-
- guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
- if (guest_0_page != 0) {
- /* Use NO_TAP so we don't get a PASS print */
- HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
- ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
- "Did not allocate page at 0\n");
- } else {
- HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
- }
- if (guest_0_page == 0)
- mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
- run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
- run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
-
- run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
- run->kvm_dirty_regs = KVM_SYNC_CRS;
- HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
-
- kvm_vm_free(vm);
-
- ksft_finished(); /* Print results and exit() accordingly */
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test code for the s390x kvm ucontrol interface
- *
- * Copyright IBM Corp. 2024
- *
- * Authors:
- * Christoph Schlameuss <schlameuss@linux.ibm.com>
- */
-#include "debug_print.h"
-#include "kselftest_harness.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sie.h"
-
-#include <linux/capability.h>
-#include <linux/sizes.h>
-
-#define PGM_SEGMENT_TRANSLATION 0x10
-
-#define VM_MEM_SIZE (4 * SZ_1M)
-#define VM_MEM_EXT_SIZE (2 * SZ_1M)
-#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
-
-/* so directly declare capget to check caps without libcap */
-int capget(cap_user_header_t header, cap_user_data_t data);
-
-/**
- * In order to create user controlled virtual machines on S390,
- * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
- * as privileged user (SYS_ADMIN).
- */
-void require_ucontrol_admin(void)
-{
- struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
- struct __user_cap_header_struct hdr = {
- .version = _LINUX_CAPABILITY_VERSION_3,
- };
- int rc;
-
- rc = capget(&hdr, data);
- TEST_ASSERT_EQ(0, rc);
- TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
-}
-
-/* Test program setting some registers and looping */
-extern char test_gprs_asm[];
-asm("test_gprs_asm:\n"
- "xgr %r0, %r0\n"
- "lgfi %r1,1\n"
- "lgfi %r2,2\n"
- "lgfi %r3,3\n"
- "lgfi %r4,4\n"
- "lgfi %r5,5\n"
- "lgfi %r6,6\n"
- "lgfi %r7,7\n"
- "0:\n"
- " diag 0,0,0x44\n"
- " ahi %r0,1\n"
- " j 0b\n"
-);
-
-/* Test program manipulating memory */
-extern char test_mem_asm[];
-asm("test_mem_asm:\n"
- "xgr %r0, %r0\n"
-
- "0:\n"
- " ahi %r0,1\n"
- " st %r1,0(%r5,%r6)\n"
-
- " xgr %r1,%r1\n"
- " l %r1,0(%r5,%r6)\n"
- " ahi %r0,1\n"
- " diag 0,0,0x44\n"
-
- " j 0b\n"
-);
-
-/* Test program manipulating storage keys */
-extern char test_skey_asm[];
-asm("test_skey_asm:\n"
- "xgr %r0, %r0\n"
-
- "0:\n"
- " ahi %r0,1\n"
- " st %r1,0(%r5,%r6)\n"
-
- " iske %r1,%r6\n"
- " ahi %r0,1\n"
- " diag 0,0,0x44\n"
-
- " sske %r1,%r6\n"
- " xgr %r1,%r1\n"
- " iske %r1,%r6\n"
- " ahi %r0,1\n"
- " diag 0,0,0x44\n"
-
- " rrbe %r1,%r6\n"
- " iske %r1,%r6\n"
- " ahi %r0,1\n"
- " diag 0,0,0x44\n"
-
- " j 0b\n"
-);
-
-FIXTURE(uc_kvm)
-{
- struct kvm_s390_sie_block *sie_block;
- struct kvm_run *run;
- uintptr_t base_gpa;
- uintptr_t code_gpa;
- uintptr_t base_hva;
- uintptr_t code_hva;
- int kvm_run_size;
- vm_paddr_t pgd;
- void *vm_mem;
- int vcpu_fd;
- int kvm_fd;
- int vm_fd;
-};
-
-/**
- * create VM with single vcpu, map kvm_run and SIE control block for easy access
- */
-FIXTURE_SETUP(uc_kvm)
-{
- struct kvm_s390_vm_cpu_processor info;
- int rc;
-
- require_ucontrol_admin();
-
- self->kvm_fd = open_kvm_dev_path_or_exit();
- self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
- ASSERT_GE(self->vm_fd, 0);
-
- kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
- KVM_S390_VM_CPU_PROCESSOR, &info);
- TH_LOG("create VM 0x%llx", info.cpuid);
-
- self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
- ASSERT_GE(self->vcpu_fd, 0);
-
- self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
- ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
- TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
- self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
- ASSERT_NE(self->run, MAP_FAILED);
- /**
- * For virtual cpus that have been created with S390 user controlled
- * virtual machines, the resulting vcpu fd can be memory mapped at page
- * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
- * the virtual cpu's hardware control block.
- */
- self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
- ASSERT_NE(self->sie_block, MAP_FAILED);
-
- TH_LOG("VM created %p %p", self->run, self->sie_block);
-
- self->base_gpa = 0;
- self->code_gpa = self->base_gpa + (3 * SZ_1M);
-
- self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
- ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
- self->base_hva = (uintptr_t)self->vm_mem;
- self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
- struct kvm_s390_ucas_mapping map = {
- .user_addr = self->base_hva,
- .vcpu_addr = self->base_gpa,
- .length = VM_MEM_SIZE,
- };
- TH_LOG("ucas map %p %p 0x%llx",
- (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
- rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
- ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
- rc, strerror(errno));
-
- TH_LOG("page in %p", (void *)self->base_gpa);
- rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
- ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
- (void *)self->base_hva, rc, strerror(errno));
-
- self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
-}
-
-FIXTURE_TEARDOWN(uc_kvm)
-{
- munmap(self->sie_block, PAGE_SIZE);
- munmap(self->run, self->kvm_run_size);
- close(self->vcpu_fd);
- close(self->vm_fd);
- close(self->kvm_fd);
- free(self->vm_mem);
-}
-
-TEST_F(uc_kvm, uc_sie_assertions)
-{
- /* assert interception of Code 08 (Program Interruption) is set */
- EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
-}
-
-TEST_F(uc_kvm, uc_attr_mem_limit)
-{
- u64 limit;
- struct kvm_device_attr attr = {
- .group = KVM_S390_VM_MEM_CTRL,
- .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
- .addr = (unsigned long)&limit,
- };
- int rc;
-
- rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
- EXPECT_EQ(0, rc);
- EXPECT_EQ(~0UL, limit);
-
- /* assert set not supported */
- rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
- EXPECT_EQ(-1, rc);
- EXPECT_EQ(EINVAL, errno);
-}
-
-TEST_F(uc_kvm, uc_no_dirty_log)
-{
- struct kvm_dirty_log dlog;
- int rc;
-
- rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
- EXPECT_EQ(-1, rc);
- EXPECT_EQ(EINVAL, errno);
-}
-
-/**
- * Assert HPAGE CAP cannot be enabled on UCONTROL VM
- */
-TEST(uc_cap_hpage)
-{
- int rc, kvm_fd, vm_fd, vcpu_fd;
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_S390_HPAGE_1M,
- };
-
- require_ucontrol_admin();
-
- kvm_fd = open_kvm_dev_path_or_exit();
- vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
- ASSERT_GE(vm_fd, 0);
-
- /* assert hpages are not supported on ucontrol vm */
- rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
- EXPECT_EQ(0, rc);
-
- /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
- rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
- EXPECT_EQ(-1, rc);
- EXPECT_EQ(EINVAL, errno);
-
- /* assert HPAGE CAP is rejected after vCPU creation */
- vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
- ASSERT_GE(vcpu_fd, 0);
- rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
- EXPECT_EQ(-1, rc);
- EXPECT_EQ(EBUSY, errno);
-
- close(vcpu_fd);
- close(vm_fd);
- close(kvm_fd);
-}
-
-/* calculate host virtual addr from guest physical addr */
-static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
-{
- return (void *)(self->base_hva - self->base_gpa + gpa);
-}
-
-/* map / make additional memory available */
-static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
-{
- struct kvm_s390_ucas_mapping map = {
- .user_addr = (u64)gpa2hva(self, vcpu_addr),
- .vcpu_addr = vcpu_addr,
- .length = length,
- };
- pr_info("ucas map %p %p 0x%llx",
- (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
- return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
-}
-
-/* unmap previously mapped memory */
-static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
-{
- struct kvm_s390_ucas_mapping map = {
- .user_addr = (u64)gpa2hva(self, vcpu_addr),
- .vcpu_addr = vcpu_addr,
- .length = length,
- };
- pr_info("ucas unmap %p %p 0x%llx",
- (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
- return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
-}
-
-/* handle ucontrol exit by mapping the accessed segment */
-static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
-{
- struct kvm_run *run = self->run;
- u64 seg_addr;
- int rc;
-
- TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
- switch (run->s390_ucontrol.pgm_code) {
- case PGM_SEGMENT_TRANSLATION:
- seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
- pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
- run->s390_ucontrol.trans_exc_code, seg_addr);
- /* map / make additional memory available */
- rc = uc_map_ext(self, seg_addr, SZ_1M);
- TEST_ASSERT_EQ(0, rc);
- break;
- default:
- TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
- }
-}
-
-/*
- * Handle the SIEIC exit
- * * fail on codes not expected in the test cases
- * Returns if interception is handled / execution can be continued
- */
-static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
-{
- struct kvm_s390_sie_block *sie_block = self->sie_block;
-
- /* disable KSS */
- sie_block->cpuflags &= ~CPUSTAT_KSS;
- /* disable skey inst interception */
- sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
-}
-
-/*
- * Handle the instruction intercept
- * Returns if interception is handled / execution can be continued
- */
-static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
-{
- struct kvm_s390_sie_block *sie_block = self->sie_block;
- int ilen = insn_length(sie_block->ipa >> 8);
- struct kvm_run *run = self->run;
-
- switch (run->s390_sieic.ipa) {
- case 0xB229: /* ISKE */
- case 0xB22b: /* SSKE */
- case 0xB22a: /* RRBE */
- uc_skey_enable(self);
-
- /* rewind to reexecute intercepted instruction */
- run->psw_addr = run->psw_addr - ilen;
- pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
- return true;
- default:
- return false;
- }
-}
-
-/*
- * Handle the SIEIC exit
- * * fail on codes not expected in the test cases
- * Returns if interception is handled / execution can be continued
- */
-static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
-{
- struct kvm_s390_sie_block *sie_block = self->sie_block;
- struct kvm_run *run = self->run;
-
- /* check SIE interception code */
- pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
- run->s390_sieic.icptcode,
- run->s390_sieic.ipa,
- run->s390_sieic.ipb);
- switch (run->s390_sieic.icptcode) {
- case ICPT_INST:
- /* end execution in caller on intercepted instruction */
- pr_info("sie instruction interception\n");
- return uc_handle_insn_ic(self);
- case ICPT_KSS:
- uc_skey_enable(self);
- return true;
- case ICPT_OPEREXC:
- /* operation exception */
- TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
- default:
- TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
- }
- return true;
-}
-
-/* verify VM state on exit */
-static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
-{
- struct kvm_run *run = self->run;
-
- switch (run->exit_reason) {
- case KVM_EXIT_S390_UCONTROL:
- /** check program interruption code
- * handle page fault --> ucas map
- */
- uc_handle_exit_ucontrol(self);
- break;
- case KVM_EXIT_S390_SIEIC:
- return uc_handle_sieic(self);
- default:
- pr_info("exit_reason %2d not handled\n", run->exit_reason);
- }
- return true;
-}
-
-/* run the VM until interrupted */
-static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
-{
- int rc;
-
- rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
- print_run(self->run, self->sie_block);
- print_regs(self->run);
- pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
- return rc;
-}
-
-static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
-{
- struct kvm_s390_sie_block *sie_block = self->sie_block;
-
- /* assert vm was interrupted by diag 0x0044 */
- TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
- TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
- TEST_ASSERT_EQ(0x8300, sie_block->ipa);
- TEST_ASSERT_EQ(0x440000, sie_block->ipb);
-}
-
-TEST_F(uc_kvm, uc_no_user_region)
-{
- struct kvm_userspace_memory_region region = {
- .slot = 1,
- .guest_phys_addr = self->code_gpa,
- .memory_size = VM_MEM_EXT_SIZE,
- .userspace_addr = (uintptr_t)self->code_hva,
- };
- struct kvm_userspace_memory_region2 region2 = {
- .slot = 1,
- .guest_phys_addr = self->code_gpa,
- .memory_size = VM_MEM_EXT_SIZE,
- .userspace_addr = (uintptr_t)self->code_hva,
- };
-
- ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion));
- ASSERT_EQ(EINVAL, errno);
-
- ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2));
- ASSERT_EQ(EINVAL, errno);
-}
-
-TEST_F(uc_kvm, uc_map_unmap)
-{
- struct kvm_sync_regs *sync_regs = &self->run->s.regs;
- struct kvm_run *run = self->run;
- const u64 disp = 1;
- int rc;
-
- /* copy test_mem_asm to code_hva / code_gpa */
- TH_LOG("copy code %p to vm mapped memory %p / %p",
- &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
- memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
-
- /* DAT disabled + 64 bit mode */
- run->psw_mask = 0x0000000180000000ULL;
- run->psw_addr = self->code_gpa;
-
- /* set register content for test_mem_asm to access not mapped memory*/
- sync_regs->gprs[1] = 0x55;
- sync_regs->gprs[5] = self->base_gpa;
- sync_regs->gprs[6] = VM_MEM_SIZE + disp;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
- /* run and expect to fail with ucontrol pic segment translation */
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(1, sync_regs->gprs[0]);
- ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-
- ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
- ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
-
- /* fail to map memory with not segment aligned address */
- rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
- ASSERT_GT(0, rc)
- TH_LOG("ucas map for non segment address should fail but didn't; "
- "result %d not expected, %s", rc, strerror(errno));
-
- /* map / make additional memory available */
- rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
- ASSERT_EQ(0, rc)
- TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- uc_assert_diag44(self);
-
- /* assert registers and memory are in expected state */
- ASSERT_EQ(2, sync_regs->gprs[0]);
- ASSERT_EQ(0x55, sync_regs->gprs[1]);
- ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
-
- /* unmap and run loop again */
- rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
- ASSERT_EQ(0, rc)
- TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(3, sync_regs->gprs[0]);
- ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
- ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
- /* handle ucontrol exit and remap memory after previous map and unmap */
- ASSERT_EQ(true, uc_handle_exit(self));
-}
-
-TEST_F(uc_kvm, uc_gprs)
-{
- struct kvm_sync_regs *sync_regs = &self->run->s.regs;
- struct kvm_run *run = self->run;
- struct kvm_regs regs = {};
-
- /* Set registers to values that are different from the ones that we expect below */
- for (int i = 0; i < 8; i++)
- sync_regs->gprs[i] = 8;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
- /* copy test_gprs_asm to code_hva / code_gpa */
- TH_LOG("copy code %p to vm mapped memory %p / %p",
- &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
- memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
-
- /* DAT disabled + 64 bit mode */
- run->psw_mask = 0x0000000180000000ULL;
- run->psw_addr = self->code_gpa;
-
- /* run and expect interception of diag 44 */
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- uc_assert_diag44(self);
-
- /* Retrieve and check guest register values */
- ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s));
- for (int i = 0; i < 8; i++) {
- ASSERT_EQ(i, regs.gprs[i]);
- ASSERT_EQ(i, sync_regs->gprs[i]);
- }
-
- /* run and expect interception of diag 44 again */
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- uc_assert_diag44(self);
-
- /* check continued increment of register 0 value */
- ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s));
- ASSERT_EQ(1, regs.gprs[0]);
- ASSERT_EQ(1, sync_regs->gprs[0]);
-}
-
-TEST_F(uc_kvm, uc_skey)
-{
- struct kvm_s390_sie_block *sie_block = self->sie_block;
- struct kvm_sync_regs *sync_regs = &self->run->s.regs;
- u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
- struct kvm_run *run = self->run;
- const u8 skeyvalue = 0x34;
-
- /* copy test_skey_asm to code_hva / code_gpa */
- TH_LOG("copy code %p to vm mapped memory %p / %p",
- &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
- memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
-
- /* set register content for test_skey_asm to access not mapped memory */
- sync_regs->gprs[1] = skeyvalue;
- sync_regs->gprs[5] = self->base_gpa;
- sync_regs->gprs[6] = test_vaddr;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
- /* DAT disabled + 64 bit mode */
- run->psw_mask = 0x0000000180000000ULL;
- run->psw_addr = self->code_gpa;
-
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(true, uc_handle_exit(self));
- ASSERT_EQ(1, sync_regs->gprs[0]);
-
- /* ISKE */
- ASSERT_EQ(0, uc_run_once(self));
-
- /*
- * Bail out and skip the test after uc_skey_enable was executed but iske
- * is still intercepted. Instructions are not handled by the kernel.
- * Thus there is no need to test this here.
- */
- TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
- TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
- TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
- TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
- TEST_REQUIRE(sie_block->ipa != 0xb229);
-
- /* ISKE contd. */
- ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(2, sync_regs->gprs[0]);
- /* assert initial skey (ACC = 0, R & C = 1) */
- ASSERT_EQ(0x06, sync_regs->gprs[1]);
- uc_assert_diag44(self);
-
- /* SSKE + ISKE */
- sync_regs->gprs[1] = skeyvalue;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(3, sync_regs->gprs[0]);
- ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
- uc_assert_diag44(self);
-
- /* RRBE + ISKE */
- sync_regs->gprs[1] = skeyvalue;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(4, sync_regs->gprs[0]);
- /* assert R reset but rest of skey unchanged */
- ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
- ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
- uc_assert_diag44(self);
-}
-
-TEST_HARNESS_MAIN
#include <processor.h>
/*
- * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
- * 2MB sized and aligned region so that the initial region corresponds to
- * exactly one large page.
+ * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB
+ * sized and aligned region so that the initial region corresponds to exactly
+ * one large page.
*/
#define MEM_REGION_SIZE 0x200000
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX 1
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XSAVE_HDR_OFFSET 512
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static void check_xtile_info(void)
+{
+ GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
+
+ xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+ PALETTE_TABLE_INDEX);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+ xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+ GUEST_ASSERT(xtile.max_names == 8);
+ xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+ GUEST_ASSERT(xtile.max_rows == 16);
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xstate *xstate)
+{
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
+ this_cpu_has(X86_FEATURE_OSXSAVE));
+ check_xtile_info();
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /*
+ * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+ * the xcomp_bv.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /*
+ * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+ * remains the same even when amx tiledata is disabled by IA32_XFD.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_x86_state *state;
+ int xsave_restore_size;
+ vm_vaddr_t amx_cfg, tiledata, xstate;
+ struct ucall uc;
+ u32 amx_offset;
+ int ret;
+
+ /*
+ * Note, all off-by-default features must be enabled before anything
+ * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+ */
+ vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+ "KVM should enumerate max XSAVE size when XSAVE is supported");
+ xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
+
+ vcpu_regs_get(vcpu, ®s1);
+
+ /* Register #NM handler */
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* XSAVE state for guest_code */
+ xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vcpu);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vcpu);
+ memset(®s1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, ®s1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(®s2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, ®s2);
+ TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
+ * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by
+ * programming TMICT (timer initial count) to the largest value possible (so
+ * that the timer will not expire during the test). Then, after an arbitrary
+ * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
+ * of the expected value based on the time elapsed, the APIC bus frequency, and
+ * the programmed TDCR (timer divide configuration register).
+ */
+
+#include "apic.h"
+#include "test_util.h"
+
+/*
+ * Possible TDCR values with matching divide count. Used to modify APIC
+ * timer frequency.
+ */
+static const struct {
+ const uint32_t tdcr;
+ const uint32_t divide_count;
+} tdcrs[] = {
+ {0x0, 2},
+ {0x1, 4},
+ {0x2, 8},
+ {0x3, 16},
+ {0x8, 32},
+ {0x9, 64},
+ {0xa, 128},
+ {0xb, 1},
+};
+
+static bool is_x2apic;
+
+static void apic_enable(void)
+{
+ if (is_x2apic)
+ x2apic_enable();
+ else
+ xapic_enable();
+}
+
+static uint32_t apic_read_reg(unsigned int reg)
+{
+ return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
+}
+
+static void apic_write_reg(unsigned int reg, uint32_t val)
+{
+ if (is_x2apic)
+ x2apic_write_reg(reg, val);
+ else
+ xapic_write_reg(reg, val);
+}
+
+static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+{
+ uint64_t tsc_hz = guest_tsc_khz * 1000;
+ const uint32_t tmict = ~0u;
+ uint64_t tsc0, tsc1, freq;
+ uint32_t tmcct;
+ int i;
+
+ apic_enable();
+
+ /*
+ * Setup one-shot timer. The vector does not matter because the
+ * interrupt should not fire.
+ */
+ apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
+
+ for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
+ apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
+ apic_write_reg(APIC_TMICT, tmict);
+
+ tsc0 = rdtsc();
+ udelay(delay_ms * 1000);
+ tmcct = apic_read_reg(APIC_TMCCT);
+ tsc1 = rdtsc();
+
+ /*
+ * Stop the timer _after_ reading the current, final count, as
+ * writing the initial counter also modifies the current count.
+ */
+ apic_write_reg(APIC_TMICT, 0);
+
+ freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
+ /* Check if measured frequency is within 5% of configured frequency. */
+ __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
+ "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
+ freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
+ apic_hz, tdcrs[i].divide_count, tsc_hz);
+ }
+
+ GUEST_DONE();
+}
+
+static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+ }
+}
+
+static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+ bool x2apic)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int ret;
+
+ is_x2apic = x2apic;
+
+ vm = vm_create(1);
+
+ sync_global_to_guest(vm, is_x2apic);
+
+ vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ NSEC_PER_SEC / apic_hz);
+
+ vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
+ vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
+
+ ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ NSEC_PER_SEC / apic_hz);
+ TEST_ASSERT(ret < 0 && errno == EINVAL,
+ "Setting of APIC bus frequency after vCPU is created should fail.");
+
+ if (!is_x2apic)
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_apic_bus_clock(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
+ puts("");
+ printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
+ printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
+ * different enough from the default 1GHz to be interesting.
+ */
+ uint64_t apic_hz = 25 * 1000 * 1000;
+ uint64_t delay_ms = 100;
+ int opt;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
+
+ while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
+ switch (opt) {
+ case 'f':
+ apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
+ break;
+ case 'd':
+ delay_ms = atoi_positive("Delay in milliseconds", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(KSFT_SKIP);
+ }
+ }
+
+ run_apic_bus_clock_test(apic_hz, delay_ms, false);
+ run_apic_bus_clock_test(apic_hz, delay_ms, true);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct cpuid_mask {
+ union {
+ struct {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ };
+ u32 regs[4];
+ };
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+ int i;
+ u32 eax, ebx, ecx, edx;
+
+ for (i = 0; i < guest_cpuid->nent; i++) {
+ __cpuid(guest_cpuid->entries[i].function,
+ guest_cpuid->entries[i].index,
+ &eax, &ebx, &ecx, &edx);
+
+ GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+ GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+ GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+ GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
+ }
+
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+ GUEST_SYNC(1);
+
+ test_guest_cpuids(guest_cpuid);
+
+ GUEST_SYNC(2);
+
+ GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
+
+ GUEST_DONE();
+}
+
+static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
+{
+ struct cpuid_mask mask;
+
+ memset(&mask, 0xff, sizeof(mask));
+
+ switch (entry->function) {
+ case 0x1:
+ mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
+ break;
+ case 0x7:
+ mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
+ break;
+ case 0xd:
+ /*
+ * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
+ * XCR0 and IA32_XSS MSR values.
+ */
+ if (entry->index < 2)
+ mask.ebx = 0;
+ break;
+ }
+ return mask;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+ const struct kvm_cpuid2 *cpuid2)
+{
+ const struct kvm_cpuid_entry2 *e1, *e2;
+ int i;
+
+ TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+ "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+ for (i = 0; i < cpuid1->nent; i++) {
+ struct cpuid_mask mask;
+
+ e1 = &cpuid1->entries[i];
+ e2 = &cpuid2->entries[i];
+
+ TEST_ASSERT(e1->function == e2->function &&
+ e1->index == e2->index && e1->flags == e2->flags,
+ "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
+ i, e1->function, e1->index, e1->flags,
+ e2->function, e2->index, e2->flags);
+
+ /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
+ mask = get_const_cpuid_mask(e1);
+
+ TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
+ (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
+ (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
+ (e1->edx & mask.edx) == (e2->edx & mask.edx),
+ "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+ e1->function, e1->index,
+ e1->eax & mask.eax, e1->ebx & mask.ebx,
+ e1->ecx & mask.ecx, e1->edx & mask.edx,
+ e2->eax & mask.eax, e2->ebx & mask.ebx,
+ e2->ecx & mask.ecx, e2->edx & mask.edx);
+ }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+ int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+ struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+ memcpy(guest_cpuids, cpuid, size);
+
+ *p_gva = gva;
+ return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+ int i, r;
+
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+ "KVM didn't update nent on success, wanted %u, got %u",
+ vcpu->cpuid->nent, cpuid->nent);
+
+ for (i = 0; i < vcpu->cpuid->nent; i++) {
+ cpuid->nent = i;
+ r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+ TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+ }
+ free(cpuid);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t cpuid_gva;
+ struct kvm_vm *vm;
+ int stage;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+ vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+ vcpu_args_set(vcpu, 1, cpuid_gva);
+
+ for (stage = 0; stage < 3; stage++)
+ run_vcpu(vcpu, stage);
+
+ set_cpuid_after_run(vcpu);
+
+ test_get_cpuid2(vcpu);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ * Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAGIC_HYPERCALL_PORT 0x80
+
+static void guest_code(void)
+{
+ u32 regs[4] = {
+ [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
+ [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
+ };
+
+ /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
+ GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
+
+ /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ /*
+ * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
+ * and then restore CR4. Do this all in assembly to ensure no AVX
+ * instructions are executed while OSXSAVE=0.
+ */
+ asm volatile (
+ "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
+ "cpuid\n\t"
+ "mov %%rdi, %%cr4\n\t"
+ : "+a" (regs[KVM_CPUID_EAX]),
+ "=b" (regs[KVM_CPUID_EBX]),
+ "+c" (regs[KVM_CPUID_ECX]),
+ "=d" (regs[KVM_CPUID_EDX])
+ : "D" (get_cr4())
+ );
+
+ /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
+ GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
+
+ /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_sregs sregs;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
+ vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
+ /* emulate hypervisor clearing CR4.OSXSAVE */
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr4 &= ~X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vcpu, &sregs);
+ continue;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest debug register tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+#define DR6_BD (1 << 13)
+#define DR7_GD (1 << 13)
+
+#define IRQ_VECTOR 0xAA
+
+/* For testing data access debug BP */
+uint32_t guest_value;
+
+extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+
+static void guest_code(void)
+{
+ /* Create a pending interrupt on current vCPU */
+ x2apic_enable();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+ APIC_DM_FIXED | IRQ_VECTOR);
+
+ /*
+ * Software BP tests.
+ *
+ * NOTE: sw_bp need to be before the cmd here, because int3 is an
+ * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
+ * capture it using the vcpu exception bitmap).
+ */
+ asm volatile("sw_bp: int3");
+
+ /* Hardware instruction BP test */
+ asm volatile("hw_bp: nop");
+
+ /* Hardware data BP test */
+ asm volatile("mov $1234,%%rax;\n\t"
+ "mov %%rax,%0;\n\t write_data:"
+ : "=m" (guest_value) : : "rax");
+
+ /*
+ * Single step test, covers 2 basic instructions and 2 emulated
+ *
+ * Enable interrupts during the single stepping to see that pending
+ * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
+ *
+ * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
+ * exits to userspace due to single-step being enabled.
+ */
+ asm volatile("ss_start: "
+ "sti\n\t"
+ "xor %%eax,%%eax\n\t"
+ "cpuid\n\t"
+ "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
+ "wrmsr\n\t"
+ "cli\n\t"
+ : : : "eax", "ebx", "ecx", "edx");
+
+ /* DR6.BD test */
+ asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+ GUEST_DONE();
+}
+
+#define CAST_TO_RIP(v) ((unsigned long long)&(v))
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, ®s);
+ regs.rip += insn_len;
+ vcpu_regs_set(vcpu, ®s);
+}
+
+int main(void)
+{
+ struct kvm_guest_debug debug;
+ unsigned long long target_dr6, target_rip;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ uint64_t cmd;
+ int i;
+ /* Instruction lengths starting at ss_start */
+ int ss_size[6] = {
+ 1, /* sti*/
+ 2, /* xor */
+ 2, /* cpuid */
+ 5, /* mov */
+ 2, /* rdmsr */
+ 1, /* cli */
+ };
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ /* Test software BPs - int3 */
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == BP_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(sw_bp),
+ "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
+ run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(sw_bp));
+ vcpu_skip_insn(vcpu, 1);
+
+ /* Test instruction HW BP over DR[0-3] */
+ for (i = 0; i < 4; i++) {
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+ debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
+ debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+ target_dr6 = 0xffff0ff0 | (1UL << i);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
+ run->debug.arch.dr6 == target_dr6,
+ "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ i, run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(hw_bp),
+ run->debug.arch.dr6, target_dr6);
+ }
+ /* Skip "nop" */
+ vcpu_skip_insn(vcpu, 1);
+
+ /* Test data access HW BP over DR[0-3] */
+ for (i = 0; i < 4; i++) {
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+ debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
+ debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
+ (0x000d0000UL << (4*i));
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+ target_dr6 = 0xffff0ff0 | (1UL << i);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(write_data) &&
+ run->debug.arch.dr6 == target_dr6,
+ "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ i, run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(write_data),
+ run->debug.arch.dr6, target_dr6);
+ /* Rollback the 4-bytes "mov" */
+ vcpu_skip_insn(vcpu, -7);
+ }
+ /* Skip the 4-bytes "mov" */
+ vcpu_skip_insn(vcpu, 7);
+
+ /* Test single step */
+ target_rip = CAST_TO_RIP(ss_start);
+ target_dr6 = 0xffff4ff0ULL;
+ for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
+ target_rip += ss_size[i];
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+ KVM_GUESTDBG_BLOCKIRQ;
+ debug.arch.debugreg[7] = 0x00000400;
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == target_rip &&
+ run->debug.arch.dr6 == target_dr6,
+ "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ i, run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+ target_dr6);
+ }
+
+ /* Finally test global disable */
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+ debug.arch.debugreg[7] = 0x400 | DR7_GD;
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+ target_dr6 = 0xffff0ff0 | DR6_BD;
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
+ run->debug.arch.dr6 == target_dr6,
+ "DR7.GD: exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+ target_dr6);
+
+ /* Disable all debug controls, run to the end */
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ cmd = get_ucall(vcpu, &uc);
+ TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "ucall_common.h"
+
+#define VCPUS 2
+#define SLOTS 2
+#define ITERATIONS 2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+ uint64_t pages_4k;
+ uint64_t pages_2m;
+ uint64_t pages_1g;
+ uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+ stats->pages_4k = vm_get_stat(vm, "pages_4k");
+ stats->pages_2m = vm_get_stat(vm, "pages_2m");
+ stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+ pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+ stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+ stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+ int i;
+
+ iteration++;
+ for (i = 0; i < VCPUS; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+ iteration)
+ ;
+ }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+
+ while (!READ_ONCE(host_quit)) {
+ int current_iteration = READ_ONCE(iteration);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+ /* Wait for the start of the next iteration to be signaled. */
+ while (current_iteration == READ_ONCE(iteration) &&
+ READ_ONCE(iteration) >= 0 &&
+ !READ_ONCE(host_quit))
+ ;
+ }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+ struct kvm_vm *vm;
+ unsigned long **bitmaps;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ uint64_t pages_per_slot;
+ int i;
+ struct kvm_page_stats stats_populated;
+ struct kvm_page_stats stats_dirty_logging_enabled;
+ struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+ struct kvm_page_stats stats_clear_pass[ITERATIONS];
+ struct kvm_page_stats stats_dirty_logging_disabled;
+ struct kvm_page_stats stats_repopulated;
+
+ vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+ SLOTS, backing_src, false);
+
+ guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ pages_per_slot = host_num_pages / SLOTS;
+ TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+ TEST_ASSERT(!(host_num_pages % 512),
+ "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
+
+ bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
+
+ /* Start the iterations */
+ iteration = -1;
+ host_quit = false;
+
+ for (i = 0; i < VCPUS; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_populated, "populating memory");
+
+ /* Enable dirty logging */
+ memstress_enable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+ while (iteration < ITERATIONS) {
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+ "dirtying memory");
+
+ memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+ if (dirty_log_manual_caps) {
+ memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+ get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+ }
+ }
+
+ /* Disable dirty logging */
+ memstress_disable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+ /* Run vCPUs again to fault pages back in. */
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
+ host_quit = true;
+ memstress_join_vcpu_threads(VCPUS);
+
+ memstress_free_bitmaps(bitmaps, SLOTS);
+ memstress_destroy_vm(vm);
+
+ TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+ stats_populated.pages_1g * 512 * 512), host_num_pages);
+
+ /*
+ * Check that all huge pages were split. Since large pages can only
+ * exist in the data slot, and the vCPUs should have dirtied all pages
+ * in the data slot, there should be no huge pages left after splitting.
+ * Splitting happens at dirty log enable time without
+ * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+ * with that capability.
+ */
+ if (dirty_log_manual_caps) {
+ TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+ TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_clear_pass[0].pages_4k);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+ } else {
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+ TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_dirty_logging_enabled.pages_4k);
+ }
+
+ /*
+ * Once dirty logging is disabled and the vCPUs have touched all their
+ * memory again, the hugepage counts should be the same as they were
+ * right after initial population of memory.
+ */
+ TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+ TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+ name);
+ puts("");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+
+ TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+ TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+ while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+ switch (opt) {
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ case 's':
+ backing_src = parse_backing_src_type(optarg);
+ break;
+ default:
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (!is_backing_src_hugetlb(backing_src)) {
+ pr_info("This test will only work reliably with HugeTLB memory. "
+ "It can work with THP, but that is best effort.\n");
+ }
+
+ guest_modes_append_default();
+
+ dirty_log_manual_caps = 0;
+ for_each_guest_mode(run_test, NULL);
+
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+ if (dirty_log_manual_caps) {
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ for_each_guest_mode(run_test, NULL);
+ } else {
+ pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+ }
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+#include "flds_emulation.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+#define MMIO_GPA 0x700000000
+#define MMIO_GVA MMIO_GPA
+
+static void guest_code(void)
+{
+ /* Execute flds with an MMIO address to force KVM to emulate it. */
+ flds(MMIO_GVA);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+ virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+ vcpu_run(vcpu);
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static bool is_kvm_controlled_msr(uint32_t msr)
+{
+ return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
+}
+
+/*
+ * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
+ * MSR, and doesn't allow setting the hidden version.
+ */
+static bool is_hidden_vmx_msr(uint32_t msr)
+{
+ switch (msr) {
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS:
+ case MSR_IA32_VMX_EXIT_CTLS:
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_quirked_msr(uint32_t msr)
+{
+ return msr != MSR_AMD64_DE_CFG;
+}
+
+static void test_feature_msr(uint32_t msr)
+{
+ const uint64_t supported_mask = kvm_get_feature_msr(msr);
+ uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /*
+ * Don't bother testing KVM-controlled MSRs beyond verifying that the
+ * MSR can be read from userspace. Any value is effectively legal, as
+ * KVM is bound by x86 architecture, not by ABI.
+ */
+ if (is_kvm_controlled_msr(msr))
+ return;
+
+ /*
+ * More goofy behavior. KVM reports the host CPU's actual revision ID,
+ * but initializes the vCPU's revision ID to an arbitrary value.
+ */
+ if (msr == MSR_IA32_UCODE_REV)
+ reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
+
+ /*
+ * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
+ * full set of features supported by KVM. For non-quirked MSRs, and
+ * when the quirk is disabled, KVM must zero-initialize the MSR and let
+ * userspace do the configuration.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
+ "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
+ reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
+ vcpu_get_msr(vcpu, msr));
+ if (!is_hidden_vmx_msr(msr))
+ vcpu_set_msr(vcpu, msr, supported_mask);
+ kvm_vm_free(vm);
+
+ if (is_hidden_vmx_msr(msr))
+ return;
+
+ if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
+ !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+ return;
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
+
+ vcpu = vm_vcpu_add(vm, 0, NULL);
+ TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
+ "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
+ msr, vcpu_get_msr(vcpu, msr));
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ const struct kvm_msr_list *feature_list;
+ int i;
+
+ /*
+ * Skip the entire test if MSR_FEATURES isn't supported, other tests
+ * will cover the "regular" list of MSRs, the coverage here is purely
+ * opportunistic and not interesting on its own.
+ */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+ (void)kvm_get_msr_index_list();
+
+ feature_list = kvm_get_feature_msr_index_list();
+ for (i = 0; i < feature_list->nmsrs; i++)
+ test_feature_msr(feature_list->indices[i]);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "kvm_test_harness.h"
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE 3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ regs->rax = -EFAULT;
+ regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+ uint64_t ret;
+
+ asm volatile("hypercall_insn:\n\t"
+ ".byte 0xcc,0xcc,0xcc\n\t"
+ : "=a"(ret)
+ : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "memory");
+
+ return ret;
+}
+
+static void guest_main(void)
+{
+ const uint8_t *native_hypercall_insn;
+ const uint8_t *other_hypercall_insn;
+ uint64_t ret;
+
+ if (host_cpu_is_intel) {
+ native_hypercall_insn = vmx_vmcall;
+ other_hypercall_insn = svm_vmmcall;
+ } else if (host_cpu_is_amd) {
+ native_hypercall_insn = svm_vmmcall;
+ other_hypercall_insn = vmx_vmcall;
+ } else {
+ GUEST_ASSERT(0);
+ /* unreachable */
+ return;
+ }
+
+ memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+ ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+ /*
+ * If the quirk is disabled, verify that guest_ud_handler() "returned"
+ * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
+ * enabled, verify that the hypercall succeeded and that KVM patched in
+ * the "right" hypercall.
+ */
+ if (quirk_disabled) {
+ GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ } else {
+ GUEST_ASSERT(!ret);
+ GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ }
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+ uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+ }
+}
+
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+ if (disable_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ quirk_disabled = disable_quirk;
+ sync_global_to_guest(vm, quirk_disabled);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ enter_guest(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ return test_harness_run(argc, argv);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+ __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint64_t flags;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ flags = run->emulation_failure.flags;
+ TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+ flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+ "run->emulation_failure is missing instruction bytes");
+
+ TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+ "Expected a 2-byte opcode for 'flds', got %d bytes",
+ run->emulation_failure.insn_size);
+
+ insn_bytes = run->emulation_failure.insn_bytes;
+ TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+ "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
+ insn_bytes[0], insn_bytes[1]);
+
+ vcpu_regs_get(vcpu, ®s);
+ regs.rip += 2;
+ vcpu_regs_set(vcpu, ®s);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+ const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+ const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+ const uint64_t legal = ignored | valid;
+ uint64_t val = BIT_ULL(bit);
+ uint64_t actual;
+ int r;
+
+ r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+ TEST_ASSERT(val & ~legal ? !r : r == 1,
+ "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+ val, val & ~legal ? "fail" : "succeed");
+
+ actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+ TEST_ASSERT(actual == (val & valid),
+ "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+ bit, actual, (val & valid));
+
+ vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ unsigned int bit;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ for (bit = 0; bit < BITS_PER_LONG; bit++)
+ test_hwcr_bit(vcpu, bit);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+ return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+ u64 r1, r2, t1, t2;
+
+ /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+ t1 = get_tscpage_ts(tsc_page);
+ r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ /* 10 ms tolerance */
+ GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+ nop_loop();
+
+ t2 = get_tscpage_ts(tsc_page);
+ r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+ u64 tsc_scale, tsc_offset;
+
+ /* Set Guest OS id to enable Hyper-V emulation */
+ GUEST_SYNC(1);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ GUEST_SYNC(2);
+
+ check_tsc_msr_rdtsc();
+
+ GUEST_SYNC(3);
+
+ /* Set up TSC page is disabled state, check that it's clean */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+ GUEST_SYNC(4);
+
+ /* Set up TSC page is enabled state */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+ GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+ GUEST_SYNC(5);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ GUEST_SYNC(6);
+
+ tsc_offset = tsc_page->tsc_offset;
+ /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+ GUEST_SYNC(7);
+ /* Sanity check TSC page timestamp, it should be close to 0 */
+ GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+ GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+ nop_loop();
+
+ /*
+ * Enable Re-enlightenment and check that TSC page stays constant across
+ * KVM_SET_CLOCK.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+ tsc_offset = tsc_page->tsc_offset;
+ tsc_scale = tsc_page->tsc_scale;
+ GUEST_SYNC(8);
+ GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+ GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+ GUEST_SYNC(9);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ /*
+ * Disable re-enlightenment and TSC page, check that KVM doesn't update
+ * it anymore.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+ memset(tsc_page, 0, sizeof(*tsc_page));
+
+ GUEST_SYNC(10);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+ GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+ "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+ (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ vm_vaddr_t tsc_page_gva;
+ int stage;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_set_hv_cpuid(vcpu);
+
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+ "TSC page has to be page aligned");
+ vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+ host_check_tsc_msr_rdtsc(vcpu);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ /* Keep in sync with guest_main() */
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
+ stage);
+ goto out;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ /* Reset kvmclock triggering TSC page update */
+ if (stage == 7 || stage == 8 || stage == 10) {
+ struct kvm_clock_data clock = {0};
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+ }
+ }
+
+out:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_HYPERV_CPUID
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+static void guest_code(void)
+{
+}
+
+static bool smt_possible(void)
+{
+ char buf[16];
+ FILE *f;
+ bool res = true;
+
+ f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ if (f) {
+ if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+ if (!strncmp(buf, "forceoff", 8) ||
+ !strncmp(buf, "notsupported", 12))
+ res = false;
+ }
+ fclose(f);
+ }
+
+ return res;
+}
+
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
+ bool evmcs_expected)
+{
+ int i;
+ int nent_expected = 10;
+ u32 test_val;
+
+ TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
+ "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+ " (returned %d)",
+ nent_expected, hv_cpuid_entries->nent);
+
+ for (i = 0; i < hv_cpuid_entries->nent; i++) {
+ const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+
+ TEST_ASSERT((entry->function >= 0x40000000) &&
+ (entry->function <= 0x40000082),
+ "function %x is our of supported range",
+ entry->function);
+
+ TEST_ASSERT(entry->index == 0,
+ ".index field should be zero");
+
+ TEST_ASSERT(entry->flags == 0,
+ ".flags field should be zero");
+
+ TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
+ !entry->padding[2], "padding should be zero");
+
+ switch (entry->function) {
+ case 0x40000000:
+ test_val = 0x40000082;
+
+ TEST_ASSERT(entry->eax == test_val,
+ "Wrong max leaf report in 0x40000000.EAX: %x"
+ " (evmcs=%d)",
+ entry->eax, evmcs_expected
+ );
+ break;
+ case 0x40000004:
+ test_val = entry->eax & (1UL << 18);
+
+ TEST_ASSERT(!!test_val == !smt_possible(),
+ "NoNonArchitecturalCoreSharing bit"
+ " doesn't reflect SMT setting");
+ break;
+ case 0x4000000A:
+ TEST_ASSERT(entry->eax & (1UL << 19),
+ "Enlightened MSR-Bitmap should always be supported"
+ " 0x40000000.EAX: %x", entry->eax);
+ if (evmcs_expected)
+ TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+ "Supported Enlightened VMCS version range is supposed to be 1:1"
+ " 0x40000000.EAX: %x", entry->eax);
+
+ break;
+ default:
+ break;
+
+ }
+ /*
+ * If needed for debug:
+ * fprintf(stdout,
+ * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
+ * entry->function, entry->eax, entry->ebx, entry->ecx,
+ * entry->edx);
+ */
+ }
+}
+
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 cpuid = {.nent = 0};
+ int ret;
+
+ if (vcpu)
+ ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ else
+ ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+
+ TEST_ASSERT(ret == -1 && errno == E2BIG,
+ "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+ " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Test vCPU ioctl version */
+ test_hv_cpuid_e2big(vm, vcpu);
+
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ test_hv_cpuid(hv_cpuid_entries, false);
+ free((void *)hv_cpuid_entries);
+
+ if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+ !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ print_skip("Enlightened VMCS is unsupported");
+ goto do_sys;
+ }
+ vcpu_enable_evmcs(vcpu);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ test_hv_cpuid(hv_cpuid_entries, true);
+ free((void *)hv_cpuid_entries);
+
+do_sys:
+ /* Test system ioctl version */
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+ print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+ goto out;
+ }
+
+ test_hv_cpuid_e2big(vm, NULL);
+
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+ test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
+
+out:
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "hyperv.h"
+#include "vmx.h"
+
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ ud_count++;
+ regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+/* Exit to L1 from L2 with RDMSR instruction */
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(7);
+
+ GUEST_SYNC(8);
+
+ /* Forced exit to L1 upon restore */
+ GUEST_SYNC(9);
+
+ vmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+ &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t hv_hcall_page_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
+ x2apic_enable();
+
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(5);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+ current_evmcs->revision_id = -1u;
+ GUEST_ASSERT(vmlaunch());
+ current_evmcs->revision_id = EVMCS_VERSION;
+ GUEST_SYNC(6);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_NMI_EXITING);
+
+ /* L2 TLB flush setup */
+ current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+ current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+ current_evmcs->hv_vm_id = 1;
+ current_evmcs->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ /*
+ * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+ * up-to-date (RIP points where it should and not at the beginning
+ * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+ */
+ GUEST_ASSERT(!vmresume());
+
+ GUEST_SYNC(10);
+
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_USE_MSR_BITMAPS);
+ __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_SYNC(11);
+
+ /* Try enlightened vmptrld with an incorrect GPA */
+ evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(ud_count == 1);
+ GUEST_DONE();
+}
+
+void inject_nmi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.nmi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vcpu);
+ memset(®s1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, ®s1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(®s2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, ®s2);
+ TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+
+ pr_info("Running L1 which uses EVMCS to run L2\n");
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ vcpu = save_restore_vm(vm, vcpu);
+
+ /* Force immediate L2->L1 exit before resuming */
+ if (stage == 8) {
+ pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+ inject_nmi(vcpu);
+ }
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ vcpu = save_restore_vm(vm, vcpu);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <vipinsh@google.com>
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+ vm_vaddr_t out_pg_gva)
+{
+ uint64_t *output_gva;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+ output_gva = (uint64_t *)out_pg_gva;
+
+ hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+ /* TLFS states output will be a uint64_t value */
+ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ vm_vaddr_t hcall_out_page;
+ vm_vaddr_t hcall_in_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t *outval;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+ /* Verify if extended hypercalls are supported */
+ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+ HV_ENABLE_EXTENDED_HYPERCALLS)) {
+ print_skip("Extended calls not supported by the kernel");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+ vcpu_set_hv_cpuid(vcpu);
+
+ /* Hypercall input */
+ hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+ /* Hypercall output */
+ hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+ vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+ addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+ *outval = EXT_CAPABILITIES;
+ run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/*
+ * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
+ * but to activate the feature it is sufficient to set it to a non-zero
+ * value. Use BIT(0) for that.
+ */
+#define HV_PV_SPINLOCKS_TEST \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
+
+struct msr_data {
+ uint32_t idx;
+ bool fault_expected;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+ bool ud_expected;
+};
+
+static bool is_write_only_msr(uint32_t msr)
+{
+ return msr == HV_X64_MSR_EOI;
+}
+
+static void guest_msr(struct msr_data *msr)
+{
+ uint8_t vector = 0;
+ uint64_t msr_val = 0;
+
+ GUEST_ASSERT(msr->idx);
+
+ if (msr->write)
+ vector = wrmsr_safe(msr->idx, msr->write_val);
+
+ if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
+ vector = rdmsr_safe(msr->idx, &msr_val);
+
+ if (msr->fault_expected)
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+ msr->write ? "WR" : "RD", msr->idx, vector);
+ else
+ __GUEST_ASSERT(!vector,
+ "Expected success on %sMSR(0x%x), got vector '0x%x'",
+ msr->write ? "WR" : "RD", msr->idx, vector);
+
+ if (vector || is_write_only_msr(msr->idx))
+ goto done;
+
+ if (msr->write)
+ __GUEST_ASSERT(!vector,
+ "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
+ msr->idx, msr->write_val, msr_val);
+
+ /* Invariant TSC bit appears when TSC invariant control MSR is written to */
+ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
+ if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
+ else
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
+ !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
+ }
+
+done:
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ u64 res, input, output;
+ uint8_t vector;
+
+ GUEST_ASSERT_NE(hcall->control, 0);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + 4096;
+ } else {
+ input = output = 0;
+ }
+
+ vector = __hyperv_hypercall(hcall->control, input, output, &res);
+ if (hcall->ud_expected) {
+ __GUEST_ASSERT(vector == UD_VECTOR,
+ "Expected #UD for control '%lu', got vector '0x%x'",
+ hcall->control, vector);
+ } else {
+ __GUEST_ASSERT(!vector,
+ "Expected no exception for control '%lu', got vector '0x%x'",
+ hcall->control, vector);
+ GUEST_ASSERT_EQ(res, hcall->expect);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Enable all supported Hyper-V features, then clear the leafs holding
+ * the features that will be tested one by one.
+ */
+ vcpu_set_hv_cpuid(vcpu);
+
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t msr_gva;
+ struct msr_data *msr;
+ bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ msr = addr_gva2hva(vm, msr_gva);
+
+ vcpu_args_set(vcpu, 1, msr_gva);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ /* TODO: Make this entire test easier to maintain. */
+ if (stage >= 21)
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = true;
+ msr->write_val = HYPERV_LINUX_OS_ID;
+ msr->fault_expected = false;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 7:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 10:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 12:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 13:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 15:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 16:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = true;
+ /*
+ * TODO: the test only writes '0' to HV_X64_MSR_RESET
+ * at the moment, writing some other value there will
+ * trigger real vCPU reset and the code is not prepared
+ * to handle it yet.
+ */
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 19:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 22:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 23:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 25:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 26:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = true;
+ break;
+ case 28:
+ vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = false;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 30:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 32:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 33:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 35:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 36:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 39:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 40:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 42:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 43:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 44:
+ /* MSR is not available when CPUID feature bit is unset */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 45:
+ /* MSR is vailable when CPUID feature bit is set */
+ if (!has_invtsc)
+ goto next_stage;
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 46:
+ /* Writing bits other than 0 is forbidden */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 0xdeadbeef;
+ msr->fault_expected = true;
+ break;
+ case 47:
+ /* Setting bit 0 enables the feature */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ default:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+next_stage:
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+static void guest_test_hcalls_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t hcall_page, hcall_params;
+ struct hcall_data *hcall;
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+ hcall = addr_gva2hva(vm, hcall_params);
+
+ vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ switch (stage) {
+ case 0:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ hcall->control = 0xbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 17:
+ /* XMM fast hypercall */
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = false;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 19:
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 20:
+ vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+ hcall->expect = HV_STATUS_INVALID_PARAMETER;
+ break;
+ case 21:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR 0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+ u32 vector;
+ u32 reserved;
+ struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ u32 vcpu_id;
+
+ x2apic_enable();
+ hv_init(pgs_gpa);
+
+ vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ /* Signal sender vCPU we're ready */
+ ipis_rcvd[vcpu_id] = (u64)-1;
+
+ for (;;)
+ asm volatile("sti; hlt; cli");
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ ipis_rcvd[vcpu_id]++;
+ wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+ struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+ int stage = 1, ipis_expected[2] = {0};
+
+ hv_init(pgs_gpa);
+ GUEST_SYNC(stage++);
+
+ /* Wait for receiver vCPUs to come up */
+ while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+ nop_loop();
+ ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+ /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ ipi->vector = IPI_VECTOR;
+ ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /*
+ * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+ */
+ ipi_ex->vp_set.valid_bank_mask = 0;
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, HV_GENERIC_SET_ALL);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ int old, r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+
+ TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ vm_vaddr_t hcall_page;
+ pthread_t threads[2];
+ int stage = 1, r;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+
+ vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+ vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+ vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return r;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(3);
+ /* Exit to L1 */
+ vmmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ GUEST_SYNC(5);
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS, &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+ struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t pgs_gpa)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+ GUEST_SYNC(1);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* L2 TLB flush setup */
+ hve->partition_assist_page = hv_pages->partition_assist_gpa;
+ hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+ hve->hv_vm_id = 1;
+ hve->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_SYNC(2);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(4);
+ vmcb->save.rip += 3;
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+ __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ hve->hv_enlightenments_control.msr_bitmap = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ vmcb->save.rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+ GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(6);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ }
+
+done:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+ u64 address_space;
+ u64 flags;
+ struct hv_vpset hv_vp_set;
+ u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+ vm_vaddr_t hcall_gva;
+ vm_paddr_t hcall_gpa;
+ vm_vaddr_t test_pages;
+ vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+ void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+ u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+ u64 expected, val;
+
+ x2apic_enable();
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+ for (;;) {
+ cpu_relax();
+
+ expected = READ_ONCE(*this_cpu);
+
+ /*
+ * Make sure the value in the test page is read after reading
+ * the expectation for the first time. Pairs with wmb() in
+ * prepare_to_test().
+ */
+ rmb();
+
+ val = READ_ONCE(*(u64 *)data->test_pages);
+
+ /*
+ * Make sure the value in the test page is read after before
+ * reading the expectation for the second time. Pairs with wmb()
+ * post_test().
+ */
+ rmb();
+
+ /*
+ * '0' indicates the sender is between iterations, wait until
+ * the sender is ready for this vCPU to start checking again.
+ */
+ if (!expected)
+ continue;
+
+ /*
+ * Re-read the per-vCPU byte to ensure the sender didn't move
+ * onto a new iteration.
+ */
+ if (expected != READ_ONCE(*this_cpu))
+ continue;
+
+ GUEST_ASSERT(val == expected);
+ }
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+ void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+ *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+ uint64_t tmp = *(uint64_t *)pte_gva1;
+
+ *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+ *(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+ int i;
+
+ for (i = 0; i < 1000000; i++)
+ asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+ /* Clear hypercall input page */
+ memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+ /* 'Disable' workers */
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+ /* Make sure workers are 'disabled' before we swap PTEs. */
+ wmb();
+
+ /* Make sure workers have enough time to notice */
+ do_delay();
+
+ /* Swap test page mappings */
+ swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+ /* Make sure we change the expectation after swapping PTEs */
+ wmb();
+
+ /* Set the expectation for workers, '0' means don't test */
+ set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+ /* Make sure workers have enough time to test */
+ do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+ struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+ vm_paddr_t hcall_gpa = data->hcall_gpa;
+ int i, stage = 1;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+ /* "Slow" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->processor_mask = 0;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ /* "Fast" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 :
+ TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT,
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ pthread_t threads[2];
+ vm_vaddr_t test_data_page, gva;
+ vm_paddr_t gpa;
+ uint64_t *pte;
+ struct test_data *data;
+ struct ucall uc;
+ int stage = 1, r, i;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Test data page */
+ test_data_page = vm_vaddr_alloc_page(vm);
+ data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+ /* Hypercall input/output */
+ data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+ data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+ memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+ /*
+ * Test pages: the first one is filled with '0x01's, the second with '0x02's
+ * and the test will swap their mappings. The third page keeps the indication
+ * about the current state of mappings.
+ */
+ data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+ for (i = 0; i < NTEST_PAGES; i++)
+ memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+ (u8)(i + 1), PAGE_SIZE);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+ /*
+ * Get PTE pointers for test pages and map them inside the guest.
+ * Use separate page for each PTE for simplicity.
+ */
+ gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+ for (i = 0; i < NTEST_PAGES; i++) {
+ pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ gpa = addr_hva2gpa(vm, pte);
+ __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+ }
+
+ /*
+ * Sender vCPU which performs the test: swaps test pages, sets expectation
+ * for 'workers' and issues TLB flush hypercalls.
+ */
+ vcpu_args_set(vcpu[0], 1, test_data_page);
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ /* Create worker vCPUs which check the contents of the test pages */
+ vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+ vcpu_args_set(vcpu[1], 1, test_data_page);
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+ vcpu_args_set(vcpu[2], 1, test_data_page);
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+ uint64_t kvmclock_base;
+ int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+ { .kvmclock_base = 0 },
+ { .kvmclock_base = 180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val) \
+ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+ int i;
+
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+ TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+ "unexpected clock data flags: %x (want set: %x)",
+ data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+ struct kvm_clock_data *end)
+{
+ uint64_t obs, exp_lo, exp_hi;
+
+ obs = uc->args[2];
+ exp_lo = start->clock;
+ exp_hi = end->clock;
+
+ assert_flags(start);
+ assert_flags(end);
+
+ TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+ "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+ obs, exp_lo, exp_hi);
+
+ pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+ obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+ struct kvm_clock_data data;
+
+ memset(&data, 0, sizeof(data));
+
+ data.clock = test_case->kvmclock_base;
+ if (test_case->realtime_offset) {
+ struct timespec ts;
+ int r;
+
+ data.flags |= KVM_CLOCK_REALTIME;
+ do {
+ r = clock_gettime(CLOCK_REALTIME, &ts);
+ if (!r)
+ break;
+ } while (errno == EINTR);
+
+ TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
+
+ data.realtime = ts.tv_sec * NSEC_PER_SEC;
+ data.realtime += ts.tv_nsec;
+ data.realtime += test_case->realtime_offset;
+ }
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_clock_data start, end;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ setup_clock(vm, &test_cases[i]);
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+ vcpu_run(vcpu);
+ vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(&uc, &start, &end);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ default:
+ TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t pvti_gva;
+ vm_paddr_t pvti_gpa;
+ struct kvm_vm *vm;
+ int flags;
+
+ flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+ TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+ pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+ vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+ uint32_t idx;
+ const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+ TEST_MSR(MSR_KVM_SYSTEM_TIME),
+ TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+ TEST_MSR(MSR_KVM_WALL_CLOCK),
+ TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+ TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+ TEST_MSR(MSR_KVM_STEAL_TIME),
+ TEST_MSR(MSR_KVM_PV_EOI_EN),
+ TEST_MSR(MSR_KVM_POLL_CONTROL),
+ TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+ TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+ uint64_t ignored;
+ uint8_t vector;
+
+ PR_MSR(msr);
+
+ vector = rdmsr_safe(msr->idx, &ignored);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+ vector = wrmsr_safe(msr->idx, 0);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+}
+
+struct hcall_data {
+ uint64_t nr;
+ const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+ TEST_HCALL(KVM_HC_KICK_CPU),
+ TEST_HCALL(KVM_HC_SEND_IPI),
+ TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+ uint64_t r;
+
+ PR_HCALL(hc);
+ r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+ GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+ test_msr(&msrs_to_test[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+ test_hcall(&hcalls_to_test[i]);
+ }
+
+ GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+ struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+ pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+ struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+ pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PR_MSR:
+ pr_msr(&uc);
+ break;
+ case UCALL_PR_HCALL:
+ pr_hcall(&uc);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+ }
+}
+
+static void test_pv_unhalt(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_cpuid_entry2 *ent;
+ u32 kvm_sig_old;
+
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+ TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+ /* KVM_PV_UNHALT test */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+ /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ kvm_sig_old = ent->ebx;
+ ent->ebx = 0xdeadbeef;
+ vcpu_set_cpuid(vcpu);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ ent->ebx = kvm_sig_old;
+ vcpu_set_cpuid(vcpu);
+
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+ /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+ vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+
+ test_pv_unhalt();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID 2
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+ ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+ /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
+ if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
+
+ /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
+ }
+
+ /* Set KVM_CAP_MAX_VCPU_ID */
+ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID again */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+ /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+ /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
+
+ /* Create vCPU with id within bounds */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
+ TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
+
+ close(ret);
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+ MWAIT_QUIRK_DISABLED = BIT(0),
+ MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+ MWAIT_DISABLED = BIT(2),
+};
+
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \
+do { \
+ bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \
+ ((testcase) & MWAIT_DISABLED); \
+ \
+ if (fault_wanted) \
+ __GUEST_ASSERT((vector) == UD_VECTOR, \
+ "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
+ testcase, vector); \
+ else \
+ __GUEST_ASSERT(!(vector), \
+ "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
+ testcase, vector); \
+} while (0)
+
+static void guest_monitor_wait(int testcase)
+{
+ u8 vector;
+
+ GUEST_SYNC(testcase);
+
+ /*
+ * Arbitrarily MONITOR this function, SVM performs fault checks before
+ * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+ */
+ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
+
+ vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
+}
+
+static void guest_code(void)
+{
+ guest_monitor_wait(MWAIT_DISABLED);
+
+ guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ uint64_t disabled_quirks;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int testcase;
+
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ testcase = uc.args[1];
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ goto done;
+ }
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ /*
+ * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+ * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+ * bit in MISC_ENABLES accordingly. If the quirk is enabled,
+ * the only valid configuration is MWAIT disabled, as CPUID
+ * can't be manually changed after running the vCPU.
+ */
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+ TEST_ASSERT(testcase & MWAIT_DISABLED,
+ "Can't toggle CPUID features after running vCPU");
+ continue;
+ }
+
+ vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+ (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR 0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception. AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+ GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+ GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+ GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+ GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+ uint32_t error_code)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+
+ vmcb->save.rip = (u64)l2_code;
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+ GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ struct vmcb_control_area *ctrl = &svm->vmcb->control;
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ svm->vmcb->save.idtr.limit = 0;
+ ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+ svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+ svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+ svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS;
+ svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+ GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+ GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+ /*
+ * VMX disallows injecting an exception with error_code[31:16] != 0,
+ * and hardware will never generate a VM-Exit with bits 31:16 set.
+ * KVM should likewise truncate the "bad" userspace value.
+ */
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+ vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+ vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+ vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+ vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+ GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else
+ l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ struct ucall uc;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(vector == uc.args[1],
+ "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(vector == -1,
+ "Expected L2 to ask for %d, L2 says it's done", vector);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+ }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ TEST_ASSERT(!events.exception.pending,
+ "Vector %d unexpectedlt pending", events.exception.nr);
+ TEST_ASSERT(!events.exception.injected,
+ "Vector %d unexpectedly injected", events.exception.nr);
+
+ events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+ events.exception.pending = !inject;
+ events.exception.injected = inject;
+ events.exception.nr = SS_VECTOR;
+ events.exception.has_error_code = true;
+ events.exception.error_code = SS_ERROR_CODE;
+ vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2. Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu_events events;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ /* Run L1 => L2. L2 should sync and request #SS. */
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, SS_VECTOR);
+
+ /* Pend #SS and request immediate exit. #SS should still be pending. */
+ queue_ss_exception(vcpu, false);
+ vcpu->run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ /* Verify the pending events comes back out the same as it went in. */
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+ KVM_VCPUEVENT_VALID_PAYLOAD);
+ TEST_ASSERT_EQ(events.exception.pending, true);
+ TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+ TEST_ASSERT_EQ(events.exception.has_error_code, true);
+ TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+ /*
+ * Run for real with the pending #SS, L1 should get a VM-Exit due to
+ * #SS interception and re-enter L2 to request #GP (via injected #SS).
+ */
+ vcpu->run->immediate_exit = false;
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, GP_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 should intercept before KVM morphs it to #DF. L1 should then
+ * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, DF_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 is no longer interception, and so should see a #DF VM-Exit. L1
+ * should then signal that is done.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+ /*
+ * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so
+ * should see nested TRIPLE_FAULT / SHUTDOWN.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, -1);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT 10
+#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+ ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+ uint64_t hpage_1 = HPAGE_GVA;
+ uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+ uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(1);
+
+ READ_ONCE(*(uint64_t *)hpage_2);
+ GUEST_SYNC(2);
+
+ guest_do_CALL(hpage_1);
+ GUEST_SYNC(3);
+
+ guest_do_CALL(hpage_3);
+ GUEST_SYNC(4);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(5);
+
+ READ_ONCE(*(uint64_t *)hpage_3);
+ GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+ int actual_pages_2m;
+
+ actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+ TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+ "Unexpected 2m page count. Expected %d, got %d",
+ expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+ int actual_splits;
+
+ actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+ TEST_ASSERT(actual_splits == expected_splits,
+ "Unexpected NX huge page split count. Expected %d, got %d",
+ expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+ long reclaim_wait_ms;
+ struct timespec ts;
+
+ reclaim_wait_ms = reclaim_period_ms * 5;
+ ts.tv_sec = reclaim_wait_ms / 1000;
+ ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+ nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+ bool reboot_permissions)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t nr_bytes;
+ void *hva;
+ int r;
+
+ vm = vm_create(1);
+
+ if (disable_nx_huge_pages) {
+ r = __vm_disable_nx_huge_pages(vm);
+ if (reboot_permissions) {
+ TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+ } else {
+ TEST_ASSERT(r == -1 && errno == EPERM,
+ "This process should not have permission to disable NX huge pages");
+ return;
+ }
+ }
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ HPAGE_GPA, HPAGE_SLOT,
+ HPAGE_SLOT_NPAGES, 0);
+
+ nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+ /*
+ * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+ * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+ * whenever KVM is shadowing the guest page tables).
+ *
+ * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+ * pages irrespective of the guest page size, so map with 4KiB pages
+ * to test that that is the case.
+ */
+ if (kvm_is_tdp_enabled())
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+ else
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+ hva = addr_gpa2hva(vm, HPAGE_GPA);
+ memset(hva, RETURN_OPCODE, nr_bytes);
+
+ check_2m_page_count(vm, 0);
+ check_split_count(vm, 0);
+
+ /*
+ * The guest code will first read from the first hugepage, resulting
+ * in a huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 1);
+ check_split_count(vm, 0);
+
+ /*
+ * Then the guest code will read from the second hugepage, resulting
+ * in another huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 2);
+ check_split_count(vm, 0);
+
+ /*
+ * Next, the guest will execute from the first huge page, causing it
+ * to be remapped at 4k.
+ *
+ * If NX huge pages are disabled, this should have no effect.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+ /*
+ * Executing from the third huge page (previously unaccessed) will
+ * cause part to be mapped at 4k.
+ *
+ * If NX huge pages are disabled, it should be mapped at 2M.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Reading from the first huge page again should have no effect. */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Give recovery thread time to run. */
+ wait_for_reclaim(reclaim_period_ms);
+
+ /*
+ * Now that the reclaimer has run, all the split pages should be gone.
+ *
+ * If NX huge pages are disabled, the relaimer will not run, so
+ * nothing should change from here on.
+ */
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, 0);
+
+ /*
+ * The 4k mapping on hpage 3 should have been removed, so check that
+ * reading from it causes a huge page mapping to be installed.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+ check_split_count(vm, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+ puts("");
+ printf(" -p: The NX reclaim period in milliseconds.\n");
+ printf(" -t: The magic token to indicate environment setup is done.\n");
+ printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int reclaim_period_ms = 0, token = 0, opt;
+ bool reboot_permissions = false;
+
+ while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+ switch (opt) {
+ case 'p':
+ reclaim_period_ms = atoi_positive("Reclaim period", optarg);
+ break;
+ case 't':
+ token = atoi_paranoid(optarg);
+ break;
+ case 'r':
+ reboot_permissions = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+
+ __TEST_REQUIRE(token == MAGIC_TOKEN,
+ "This test must be run with the magic token via '-t %d'.\n"
+ "Running via nx_huge_pages_test.sh, which also handles "
+ "environment setup, is strongly recommended.", MAGIC_TOKEN);
+
+ run_test(reclaim_period_ms, false, reboot_permissions);
+ run_test(reclaim_period_ms, true, reboot_permissions);
+
+ return 0;
+}
+
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+# If we're already root, the host might not have sudo.
+if [ $(whoami) == "root" ]; then
+ function do_sudo () {
+ "$@"
+ }
+else
+ function do_sudo () {
+ sudo "$@"
+ }
+fi
+
+set +e
+
+function sudo_echo () {
+ echo "$1" | do_sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+ set -e
+
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+ sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+ sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+ # Test with reboot permissions
+ if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+ echo Running test with CAP_SYS_BOOT enabled
+ $NXECUTABLE -t 887563923 -p 100 -r
+ test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+ else
+ echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+ fi
+
+ # Test without reboot permissions
+ if [ $(whoami) != "root" ] ; then
+ echo Running test with CAP_SYS_BOOT disabled
+ $NXECUTABLE -t 887563923 -p 100
+ else
+ echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+ fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+ uint64_t msr_platform_info;
+ uint8_t vector;
+
+ GUEST_SYNC(true);
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+ GUEST_SYNC(false);
+ vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t msr_platform_info;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ break;
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of iterations of the loop for the guest measurement payload. */
+#define NUM_LOOPS 10
+
+/* Each iteration of the loop retires one branch instruction. */
+#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
+
+/*
+ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
+ * 1 LOOP.
+ */
+#define NUM_INSNS_PER_LOOP 3
+
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disable the
+ * counter. 2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS 5
+
+/* Total number of instructions retired within the measured section. */
+#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code,
+ uint8_t pmu_version,
+ uint64_t perf_capabilities)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ sync_global_to_guest(vm, kvm_pmu_version);
+
+ /*
+ * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+ * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+ */
+ if (kvm_has_perf_caps)
+ vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+ vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+ return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+ /*
+ * Return the effective PMU version, i.e. the minimum between what KVM
+ * supports and what is enumerated to the guest. The host deliberately
+ * advertises a PMU version to the guest beyond what is actually
+ * supported by KVM to verify KVM doesn't freak out and do something
+ * bizarre with an architecturally valid, but unsupported, version.
+ */
+ return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero. If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+ struct kvm_x86_pmu_feature event,
+ uint32_t pmc, uint32_t pmc_msr)
+{
+ uint64_t count;
+
+ count = _rdpmc(pmc);
+ if (!this_pmu_has(event))
+ goto sanity_checks;
+
+ switch (idx) {
+ case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ break;
+ case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+ GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+ break;
+ case INTEL_ARCH_LLC_REFERENCES_INDEX:
+ case INTEL_ARCH_LLC_MISSES_INDEX:
+ if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+ !this_cpu_has(X86_FEATURE_CLFLUSH))
+ break;
+ fallthrough;
+ case INTEL_ARCH_CPU_CYCLES_INDEX:
+ case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ GUEST_ASSERT_NE(count, 0);
+ break;
+ case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ break;
+ default:
+ break;
+ }
+
+sanity_checks:
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+ GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+ wrmsr(pmc_msr, 0xdead);
+ GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence. Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
+ * misses, i.e. to allow testing that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
+do { \
+ __asm__ __volatile__("wrmsr\n\t" \
+ " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
+ "1:\n\t" \
+ clflush "\n\t" \
+ "mfence\n\t" \
+ FEP "loop 1b\n\t" \
+ FEP "mov %%edi, %%ecx\n\t" \
+ FEP "xor %%eax, %%eax\n\t" \
+ FEP "xor %%edx, %%edx\n\t" \
+ "wrmsr\n\t" \
+ :: "a"((uint32_t)_value), "d"(_value >> 32), \
+ "c"(_msr), "D"(_msr) \
+ ); \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do { \
+ wrmsr(pmc_msr, 0); \
+ \
+ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
+ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
+ else \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
+ \
+ guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+ uint32_t pmc, uint32_t pmc_msr,
+ uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+ GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+ if (is_forced_emulation_enabled)
+ GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+ const struct {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+ } intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same
+ * as the general purpose architectural event. The fixed counter
+ * explicitly counts at the same frequency as the TSC, whereas
+ * the GP event counts at a fixed, but uarch specific, frequency.
+ * Bundle them here for simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ };
+
+ uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint32_t pmu_version = guest_get_pmu_version();
+ /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+ bool guest_has_perf_global_ctrl = pmu_version >= 2;
+ struct kvm_x86_pmu_feature gp_event, fixed_event;
+ uint32_t base_pmc_msr;
+ unsigned int i;
+
+ /* The host side shouldn't invoke this without a guest PMU. */
+ GUEST_ASSERT(pmu_version);
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_pmc_msr = MSR_IA32_PMC0;
+ else
+ base_pmc_msr = MSR_IA32_PERFCTR0;
+
+ gp_event = intel_event_to_feature[idx].gp_event;
+ GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+ GUEST_ASSERT(nr_gp_counters);
+
+ for (i = 0; i < nr_gp_counters; i++) {
+ uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+ ARCH_PERFMON_EVENTSEL_ENABLE |
+ intel_pmu_arch_events[idx];
+
+ wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+ if (guest_has_perf_global_ctrl)
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+ __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ MSR_P6_EVNTSEL0 + i, eventsel);
+ }
+
+ if (!guest_has_perf_global_ctrl)
+ return;
+
+ fixed_event = intel_event_to_feature[idx].fixed_event;
+ if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+ return;
+
+ i = fixed_event.f.bit;
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+ __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ MSR_CORE_PERF_FIXED_CTR0 + i,
+ MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+ uint8_t i;
+
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+ guest_test_arch_event(i);
+
+ GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t length, uint8_t unavailable_mask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Testing arch events requires a vPMU (there are no negative tests). */
+ if (!pmu_version)
+ return;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+ length);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+ unavailable_mask);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS 8
+#define MAX_NR_FIXED_COUNTERS 3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
+ "Expected %s on " #insn "(0x%x), got vector %u", \
+ expect_gp ? "#GP" : "no fault", msr, vector) \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
+ __GUEST_ASSERT(val == expected_val, \
+ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
+ msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+ uint64_t expected_val)
+{
+ uint8_t vector;
+ uint64_t val;
+
+ vector = rdpmc_safe(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+ if (!is_forced_emulation_enabled)
+ return;
+
+ vector = rdpmc_safe_fep(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+ uint8_t nr_counters, uint32_t or_mask)
+{
+ const bool pmu_has_fast_mode = !guest_get_pmu_version();
+ uint8_t i;
+
+ for (i = 0; i < nr_possible_counters; i++) {
+ /*
+ * TODO: Test a value that validates full-width writes and the
+ * width of the counters.
+ */
+ const uint64_t test_val = 0xffff;
+ const uint32_t msr = base_msr + i;
+
+ /*
+ * Fixed counters are supported if the counter is less than the
+ * number of enumerated contiguous counters *or* the counter is
+ * explicitly enumerated in the supported counters mask.
+ */
+ const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+ /*
+ * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+ * unsupported, i.e. doesn't #GP and reads back '0'.
+ */
+ const uint64_t expected_val = expect_success ? test_val : 0;
+ const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+ msr != MSR_P6_PERFCTR1;
+ uint32_t rdpmc_idx;
+ uint8_t vector;
+ uint64_t val;
+
+ vector = wrmsr_safe(msr, test_val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+ vector = rdmsr_safe(msr, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+ /* On #GP, the result of RDMSR is undefined. */
+ if (!expect_gp)
+ GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+ /*
+ * Redo the read tests with RDPMC, which has different indexing
+ * semantics and additional capabilities.
+ */
+ rdpmc_idx = i;
+ if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+ rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+ guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+ /*
+ * KVM doesn't support non-architectural PMUs, i.e. it should
+ * impossible to have fast mode RDPMC. Verify that attempting
+ * to use fast RDPMC always #GPs.
+ */
+ GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+ rdpmc_idx |= INTEL_RDPMC_FAST;
+ guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+ vector = wrmsr_safe(msr, 0);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+ }
+}
+
+static void guest_test_gp_counters(void)
+{
+ uint8_t pmu_version = guest_get_pmu_version();
+ uint8_t nr_gp_counters = 0;
+ uint32_t base_msr;
+
+ if (pmu_version)
+ nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+ /*
+ * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
+ * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
+ * of GP counters. If there are no GP counters, require KVM to leave
+ * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
+ * follow the spirit of the architecture and only globally enable GP
+ * counters, of which there are none.
+ */
+ if (pmu_version > 1) {
+ uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+
+ if (nr_gp_counters)
+ GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
+ else
+ GUEST_ASSERT_EQ(global_ctrl, 0);
+ }
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_msr = MSR_IA32_PMC0;
+ else
+ base_msr = MSR_IA32_PERFCTR0;
+
+ guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+ GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_gp_counters)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+ nr_gp_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+ uint64_t supported_bitmask = 0;
+ uint8_t nr_fixed_counters = 0;
+ uint8_t i;
+
+ /* Fixed counters require Architectural vPMU Version 2+. */
+ if (guest_get_pmu_version() >= 2)
+ nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+ /*
+ * The supported bitmask for fixed counters was introduced in PMU
+ * version 5.
+ */
+ if (guest_get_pmu_version() >= 5)
+ supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+ guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+ nr_fixed_counters, supported_bitmask);
+
+ for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+ uint8_t vector;
+ uint64_t val;
+
+ if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+ vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+ FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+ vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+ continue;
+ }
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+ GUEST_ASSERT_NE(val, 0);
+ }
+ GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_fixed_counters,
+ uint32_t supported_bitmask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+ supported_bitmask);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+ nr_fixed_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+ uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ unsigned int i;
+ uint8_t v, j;
+ uint32_t k;
+
+ const uint64_t perf_caps[] = {
+ 0,
+ PMU_CAP_FW_WRITES,
+ };
+
+ /*
+ * Test up to PMU v5, which is the current maximum version defined by
+ * Intel, i.e. is the last version that is guaranteed to be backwards
+ * compatible with KVM's existing behavior.
+ */
+ uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+ /*
+ * Detect the existence of events that aren't supported by selftests.
+ * This will (obviously) fail any time the kernel adds support for a
+ * new event, but it's worth paying that price to keep the test fresh.
+ */
+ TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+ nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+ /*
+ * Force iterating over known arch events regardless of whether or not
+ * KVM/hardware supports a given event.
+ */
+ nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+ for (v = 0; v <= max_pmu_version; v++) {
+ for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+ if (!kvm_has_perf_caps && perf_caps[i])
+ continue;
+
+ pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ /*
+ * To keep the total runtime reasonable, test every
+ * possible non-zero, non-reserved bitmap combination
+ * only with the native PMU version and the full bit
+ * vector length.
+ */
+ if (v == pmu_version) {
+ for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+ test_arch_events(v, perf_caps[i], nr_arch_events, k);
+ }
+ /*
+ * Test single bits for all PMU version and lengths up
+ * the number of events +1 (to verify KVM doesn't do
+ * weird things if the guest length is greater than the
+ * host length). Explicitly test a mask of '0' and all
+ * ones i.e. all events being available and unavailable.
+ */
+ for (j = 0; j <= nr_arch_events + 1; j++) {
+ test_arch_events(v, perf_caps[i], j, 0);
+ test_arch_events(v, perf_caps[i], j, 0xff);
+
+ for (k = 0; k < nr_arch_events; k++)
+ test_arch_events(v, perf_caps[i], j, BIT(k));
+ }
+
+ pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_gp_counters; j++)
+ test_gp_counters(v, perf_caps[i], j);
+
+ pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_fixed_counters; j++) {
+ for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+ test_fixed_counters(v, perf_caps[i], j, k);
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+
+ test_intel_counters();
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_BRANCHES 42
+#define MAX_TEST_EVENTS 10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
+
+struct __kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+};
+
+/*
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the
+ * same encoding for Instructions Retired.
+ */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+static const struct __kvm_pmu_event_filter base_event_filter = {
+ .nevents = ARRAY_SIZE(base_event_filter.events),
+ .events = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+ AMD_ZEN_BRANCHES_RETIRED,
+ },
+};
+
+struct {
+ uint64_t loads;
+ uint64_t stores;
+ uint64_t loads_stores;
+ uint64_t branches_retired;
+ uint64_t instructions_retired;
+} pmc_results;
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(-EFAULT);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+ const uint64_t branches_retired = rdmsr(msr_base + 0);
+ const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+ pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+ pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
+ wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+ run_and_measure_loop(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+ run_and_measure_loop(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ memset(&pmc_results, 0, sizeof(pmc_results));
+ sync_global_to_guest(vcpu->vm, pmc_results);
+
+ r = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+ sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ r = run_vcpu_to_sync(vcpu);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+ return !r;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+}
+
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ if (br && br != NUM_BRANCHES) \
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
+ __func__, br, NUM_BRANCHES); \
+ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
+ __func__, br); \
+ TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
+ __func__, ir); \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
+ __func__, br); \
+ TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
+ __func__, ir); \
+} while (0)
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+ run_vcpu_and_sync_pmc_results(vcpu);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_with_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+ run_vcpu_and_sync_pmc_results(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = KVM_PMU_EVENT_DENY,
+ .nevents = 1,
+ .events = {
+ RAW_EVENT(0x1C2, 0),
+ },
+ };
+
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+ struct kvm_vcpu *vcpu;
+ int r;
+ struct kvm_vm *vm;
+
+ r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+ if (!(r & KVM_PMU_CAP_DISABLE))
+ return;
+
+ vm = vm_create(1);
+
+ vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ TEST_ASSERT(!sanity_check_pmu(vcpu),
+ "Guest should not be able to use disabled PMU.");
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
+ */
+static bool use_intel_pmu(void)
+{
+ return host_cpu_is_intel &&
+ kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+ kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
+}
+
+/*
+ * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
+ * 0xc2,0 for Branch Instructions Retired.
+ */
+static bool use_amd_pmu(void)
+{
+ return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+}
+
+/*
+ * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
+ * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
+ * supported on Intel Xeon processors:
+ * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
+ */
+#define MEM_INST_RETIRED 0xD0
+#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83)
+
+static bool supports_event_mem_inst_retired(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ if (x86_family(eax) == 0x6) {
+ switch (x86_model(eax)) {
+ /* Sapphire Rapids */
+ case 0x8F:
+ /* Ice Lake */
+ case 0x6A:
+ /* Skylake */
+ /* Cascade Lake */
+ case 0x55:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * "LS Dispatch", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+#define LS_DISPATCH 0x29
+#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
+
+#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
+#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
+
+static void masked_events_guest_test(uint32_t msr_base)
+{
+ /*
+ * The actual value of the counters don't determine the outcome of
+ * the test. Only that they are zero or non-zero.
+ */
+ const uint64_t loads = rdmsr(msr_base + 0);
+ const uint64_t stores = rdmsr(msr_base + 1);
+ const uint64_t loads_stores = rdmsr(msr_base + 2);
+ int val;
+
+
+ __asm__ __volatile__("movl $0, %[v];"
+ "movl %[v], %%eax;"
+ "incl %[v];"
+ : [v]"+m"(val) :: "eax");
+
+ pmc_results.loads = rdmsr(msr_base + 0) - loads;
+ pmc_results.stores = rdmsr(msr_base + 1) - stores;
+ pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
+}
+
+static void intel_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
+ wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
+ wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
+
+ masked_events_guest_test(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void amd_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL1, 0);
+ wrmsr(MSR_K7_EVNTSEL2, 0);
+
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
+ wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
+
+ masked_events_guest_test(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+ const uint64_t masked_events[],
+ const int nmasked_events)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = nmasked_events,
+ .action = KVM_PMU_EVENT_ALLOW,
+ .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ };
+
+ memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+ test_with_filter(vcpu, &f);
+}
+
+#define ALLOW_LOADS BIT(0)
+#define ALLOW_STORES BIT(1)
+#define ALLOW_LOADS_STORES BIT(2)
+
+struct masked_events_test {
+ uint64_t intel_events[MAX_TEST_EVENTS];
+ uint64_t intel_event_end;
+ uint64_t amd_events[MAX_TEST_EVENTS];
+ uint64_t amd_event_end;
+ const char *msg;
+ uint32_t flags;
+};
+
+/*
+ * These are the test cases for the masked events tests.
+ *
+ * For each test, the guest enables 3 PMU counters (loads, stores,
+ * loads + stores). The filter is then set in KVM with the masked events
+ * provided. The test then verifies that the counters agree with which
+ * ones should be counting and which ones should be filtered.
+ */
+const struct masked_events_test test_cases[] = {
+ {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow loads.",
+ .flags = ALLOW_LOADS,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow stores.",
+ .flags = ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
+ },
+ .msg = "Only allow loads + stores.",
+ .flags = ALLOW_LOADS_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
+ },
+ .msg = "Only allow loads and stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow loads and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow stores and loads + stores.",
+ .flags = ALLOW_STORES | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ },
+ .msg = "Only allow loads, stores, and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
+ },
+};
+
+static int append_test_events(const struct masked_events_test *test,
+ uint64_t *events, int nevents)
+{
+ const uint64_t *evts;
+ int i;
+
+ evts = use_intel_pmu() ? test->intel_events : test->amd_events;
+ for (i = 0; i < MAX_TEST_EVENTS; i++) {
+ if (evts[i] == 0)
+ break;
+
+ events[nevents + i] = evts[i];
+ }
+
+ return nevents + i;
+}
+
+static bool bool_eq(bool a, bool b)
+{
+ return a == b;
+}
+
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+ int nevents)
+{
+ int ntests = ARRAY_SIZE(test_cases);
+ int i, n;
+
+ for (i = 0; i < ntests; i++) {
+ const struct masked_events_test *test = &test_cases[i];
+
+ /* Do any test case events overflow MAX_TEST_EVENTS? */
+ assert(test->intel_event_end == 0);
+ assert(test->amd_event_end == 0);
+
+ n = append_test_events(test, events, nevents);
+
+ run_masked_events_test(vcpu, events, n);
+
+ TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+ bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+ bool_eq(pmc_results.loads_stores,
+ test->flags & ALLOW_LOADS_STORES),
+ "%s loads: %lu, stores: %lu, loads + stores: %lu",
+ test->msg, pmc_results.loads, pmc_results.stores,
+ pmc_results.loads_stores);
+ }
+}
+
+static void add_dummy_events(uint64_t *events, int nevents)
+{
+ int i;
+
+ for (i = 0; i < nevents; i++) {
+ int event_select = i % 0xFF;
+ bool exclude = ((i % 4) == 0);
+
+ if (event_select == MEM_INST_RETIRED ||
+ event_select == LS_DISPATCH)
+ event_select++;
+
+ events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
+ 0, exclude);
+ }
+}
+
+static void test_masked_events(struct kvm_vcpu *vcpu)
+{
+ int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+ uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+
+ /* Run the test cases against a sparse PMU event filter. */
+ run_masked_events_tests(vcpu, events, 0);
+
+ /* Run the test cases against a dense PMU event filter. */
+ add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
+ run_masked_events_tests(vcpu, events, nevents);
+}
+
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
+
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+ uint32_t flags, uint32_t action)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = 1,
+ .flags = flags,
+ .action = action,
+ .events = {
+ event,
+ },
+ };
+
+ return set_pmu_event_filter(vcpu, &f);
+}
+
+static void test_filter_ioctl(struct kvm_vcpu *vcpu)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct __kvm_pmu_event_filter f;
+ uint64_t e = ~0ul;
+ int r;
+
+ /*
+ * Unfortunately having invalid bits set in event data is expected to
+ * pass when flags == 0 (bits other than eventsel+umask).
+ */
+ r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
+
+ e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ f = base_event_filter;
+ f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+ f = base_event_filter;
+ f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+ f = base_event_filter;
+ f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+ f = base_event_filter;
+ f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
+
+ /* Only OS_EN bit is enabled for fixed counter[idx]. */
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
+ }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+ uint32_t action, uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = action,
+ .fixed_counter_bitmap = bitmap,
+ };
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+ uint32_t action,
+ uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = action;
+ f.fixed_counter_bitmap = bitmap;
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+ uint8_t nr_fixed_counters)
+{
+ unsigned int i;
+ uint32_t bitmap;
+ uint64_t count;
+
+ TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+ "Invalid nr_fixed_counters");
+
+ /*
+ * Check the fixed performance counter can count normally when KVM
+ * userspace doesn't set any pmu filter.
+ */
+ count = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(count, "Unexpected count value: %ld", count);
+
+ for (i = 0; i < BIT(nr_fixed_counters); i++) {
+ bitmap = BIT(i);
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+ /*
+ * Check that fixed_counter_bitmap has higher priority than
+ * events[] when both are set.
+ */
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+ }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint8_t idx;
+
+ /*
+ * Check that pmu_event_filter works as expected when it's applied to
+ * fixed performance counters.
+ */
+ for (idx = 0; idx < nr_fixed_counters; idx++) {
+ vm = vm_create_with_one_vcpu(&vcpu,
+ intel_run_fixed_counter_guest_code);
+ vcpu_args_set(vcpu, 1, idx);
+ __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+ kvm_vm_free(vm);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void);
+ struct kvm_vcpu *vcpu, *vcpu2 = NULL;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
+
+ TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+ guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+ if (use_amd_pmu())
+ test_amd_deny_list(vcpu);
+
+ test_without_filter(vcpu);
+ test_member_deny_list(vcpu);
+ test_member_allow_list(vcpu);
+ test_not_member_deny_list(vcpu);
+ test_not_member_allow_list(vcpu);
+
+ if (use_intel_pmu() &&
+ supports_event_mem_inst_retired() &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
+ vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
+ else if (use_amd_pmu())
+ vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
+
+ if (vcpu2)
+ test_masked_events(vcpu2);
+ test_filter_ioctl(vcpu);
+
+ kvm_vm_free(vm);
+
+ test_pmu_config_disable(guest_code);
+ test_fixed_counter_bitmap();
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/memfd.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define BASE_DATA_SLOT 10
+#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
+#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
+
+/* Horrific macro so that the line info is captured accurately :-( */
+#define memcmp_g(gpa, pattern, size) \
+do { \
+ uint8_t *mem = (uint8_t *)gpa; \
+ size_t i; \
+ \
+ for (i = 0; i < size; i++) \
+ __GUEST_ASSERT(mem[i] == pattern, \
+ "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
+ pattern, i, gpa + i, mem[i]); \
+} while (0)
+
+static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ TEST_ASSERT(mem[i] == pattern,
+ "Host expected 0x%x at gpa 0x%lx, got 0x%x",
+ pattern, gpa + i, mem[i]);
+}
+
+/*
+ * Run memory conversion tests with explicit conversion:
+ * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
+ * to back/unback private memory. Subsequent accesses by guest to the gpa range
+ * will not cause exit to userspace.
+ *
+ * Test memory conversion scenarios with following steps:
+ * 1) Access private memory using private access and verify that memory contents
+ * are not visible to userspace.
+ * 2) Convert memory to shared using explicit conversions and ensure that
+ * userspace is able to access the shared regions.
+ * 3) Convert memory back to private using explicit conversions and ensure that
+ * userspace is again not able to access converted private regions.
+ */
+
+#define GUEST_STAGE(o, s) { .offset = o, .size = s }
+
+enum ucall_syncs {
+ SYNC_SHARED,
+ SYNC_PRIVATE,
+};
+
+static void guest_sync_shared(uint64_t gpa, uint64_t size,
+ uint8_t current_pattern, uint8_t new_pattern)
+{
+ GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
+}
+
+static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+{
+ GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
+}
+
+/* Arbitrary values, KVM doesn't care about the attribute flags. */
+#define MAP_GPA_SET_ATTRIBUTES BIT(0)
+#define MAP_GPA_SHARED BIT(1)
+#define MAP_GPA_DO_FALLOCATE BIT(2)
+
+static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+ bool do_fallocate)
+{
+ uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+
+ if (map_shared)
+ flags |= MAP_GPA_SHARED;
+ if (do_fallocate)
+ flags |= MAP_GPA_DO_FALLOCATE;
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, true, do_fallocate);
+}
+
+static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, false, do_fallocate);
+}
+
+struct {
+ uint64_t offset;
+ uint64_t size;
+} static const test_ranges[] = {
+ GUEST_STAGE(0, PAGE_SIZE),
+ GUEST_STAGE(0, SZ_2M),
+ GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
+ GUEST_STAGE(PAGE_SIZE, SZ_2M),
+ GUEST_STAGE(SZ_2M, PAGE_SIZE),
+};
+
+static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+{
+ const uint8_t def_p = 0xaa;
+ const uint8_t init_p = 0xcc;
+ uint64_t j;
+ int i;
+
+ /* Memory should be shared by default. */
+ memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
+ guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
+
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+ uint8_t p1 = 0x11;
+ uint8_t p2 = 0x22;
+ uint8_t p3 = 0x33;
+ uint8_t p4 = 0x44;
+
+ /*
+ * Set the test region to pattern one to differentiate it from
+ * the data range as a whole (contains the initial pattern).
+ */
+ memset((void *)gpa, p1, size);
+
+ /*
+ * Convert to private, set and verify the private data, and
+ * then verify that the rest of the data (map shared) still
+ * holds the initial pattern, and that the host always sees the
+ * shared memory (initial pattern). Unlike shared memory,
+ * punching a hole in private memory is destructive, i.e.
+ * previous values aren't guaranteed to be preserved.
+ */
+ guest_map_private(gpa, size, do_fallocate);
+
+ if (size > PAGE_SIZE) {
+ memset((void *)gpa, p2, PAGE_SIZE);
+ goto skip;
+ }
+
+ memset((void *)gpa, p2, size);
+ guest_sync_private(gpa, size, p1);
+
+ /*
+ * Verify that the private memory was set to pattern two, and
+ * that shared memory still holds the initial pattern.
+ */
+ memcmp_g(gpa, p2, size);
+ if (gpa > base_gpa)
+ memcmp_g(base_gpa, init_p, gpa - base_gpa);
+ if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
+ memcmp_g(gpa + size, init_p,
+ (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
+
+ /*
+ * Convert odd-number page frames back to shared to verify KVM
+ * also correctly handles holes in private ranges.
+ */
+ for (j = 0; j < size; j += PAGE_SIZE) {
+ if ((j >> PAGE_SHIFT) & 1) {
+ guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
+ guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
+
+ memcmp_g(gpa + j, p3, PAGE_SIZE);
+ } else {
+ guest_sync_private(gpa + j, PAGE_SIZE, p1);
+ }
+ }
+
+skip:
+ /*
+ * Convert the entire region back to shared, explicitly write
+ * pattern three to fill in the even-number frames before
+ * asking the host to verify (and write pattern four).
+ */
+ guest_map_shared(gpa, size, do_fallocate);
+ memset((void *)gpa, p3, size);
+ guest_sync_shared(gpa, size, p3, p4);
+ memcmp_g(gpa, p4, size);
+
+ /* Reset the shared memory back to the initial pattern. */
+ memset((void *)gpa, init_p, size);
+
+ /*
+ * Free (via PUNCH_HOLE) *all* private memory so that the next
+ * iteration starts from a clean slate, e.g. with respect to
+ * whether or not there are pages/folios in guest_mem.
+ */
+ guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
+ }
+}
+
+static void guest_punch_hole(uint64_t gpa, uint64_t size)
+{
+ /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
+ uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+/*
+ * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
+ * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
+ * (subsequent fault) should zero memory.
+ */
+static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+{
+ const uint8_t init_p = 0xcc;
+ int i;
+
+ /*
+ * Convert the entire range to private, this testcase is all about
+ * punching holes in guest_memfd, i.e. shared mappings aren't needed.
+ */
+ guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+
+ /*
+ * Free all memory before each iteration, even for the !precise
+ * case where the memory will be faulted back in. Freeing and
+ * reallocating should obviously work, and freeing all memory
+ * minimizes the probability of cross-testcase influence.
+ */
+ guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
+
+ /* Fault-in and initialize memory, and verify the pattern. */
+ if (precise) {
+ memset((void *)gpa, init_p, size);
+ memcmp_g(gpa, init_p, size);
+ } else {
+ memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+ }
+
+ /*
+ * Punch a hole at the target range and verify that reads from
+ * the guest succeed and return zeroes.
+ */
+ guest_punch_hole(gpa, size);
+ memcmp_g(gpa, 0, size);
+ }
+}
+
+static void guest_code(uint64_t base_gpa)
+{
+ /*
+ * Run the conversion test twice, with and without doing fallocate() on
+ * the guest_memfd backing when converting between shared and private.
+ */
+ guest_test_explicit_conversion(base_gpa, false);
+ guest_test_explicit_conversion(base_gpa, true);
+
+ /*
+ * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
+ * faulted in, once with only the target range faulted in.
+ */
+ guest_test_punch_hole(base_gpa, false);
+ guest_test_punch_hole(base_gpa, true);
+ GUEST_DONE();
+}
+
+static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint64_t gpa = run->hypercall.args[0];
+ uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+ bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
+ bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
+ bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
+ struct kvm_vm *vm = vcpu->vm;
+
+ TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
+ "Wanted MAP_GPA_RANGE (%u), got '%llu'",
+ KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
+
+ if (do_fallocate)
+ vm_guest_mem_fallocate(vm, gpa, size, map_shared);
+
+ if (set_attributes)
+ vm_set_memory_attributes(vm, gpa, size,
+ map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ run->hypercall.ret = 0;
+}
+
+static bool run_vcpus;
+
+static void *__test_mem_conversions(void *__vcpu)
+{
+ struct kvm_vcpu *vcpu = __vcpu;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+
+ while (!READ_ONCE(run_vcpus))
+ ;
+
+ for ( ;; ) {
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_HYPERCALL) {
+ handle_exit_hypercall(vcpu);
+ continue;
+ }
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC: {
+ uint64_t gpa = uc.args[1];
+ size_t size = uc.args[2];
+ size_t i;
+
+ TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
+ uc.args[0] == SYNC_PRIVATE,
+ "Unknown sync command '%ld'", uc.args[0]);
+
+ for (i = 0; i < size; i += vm->page_size) {
+ size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
+ uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+
+ /* In all cases, the host should observe the shared data. */
+ memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
+
+ /* For shared, write the new pattern to guest memory. */
+ if (uc.args[0] == SYNC_SHARED)
+ memset(hva, uc.args[4], nr_bytes);
+ }
+ break;
+ }
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+}
+
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+ uint32_t nr_memslots)
+{
+ /*
+ * Allocate enough memory so that each vCPU's chunk of memory can be
+ * naturally aligned with respect to the size of the backing store.
+ */
+ const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
+ const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
+ const size_t memfd_size = per_cpu_size * nr_vcpus;
+ const size_t slot_size = memfd_size / nr_memslots;
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ pthread_t threads[KVM_MAX_VCPUS];
+ struct kvm_vm *vm;
+ int memfd, i, r;
+
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+ };
+
+ TEST_ASSERT(slot_size * nr_memslots == memfd_size,
+ "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
+ memfd_size, nr_memslots);
+ vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
+
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+
+ memfd = vm_create_guest_memfd(vm, memfd_size, 0);
+
+ for (i = 0; i < nr_memslots; i++)
+ vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+ BASE_DATA_SLOT + i, slot_size / vm->page_size,
+ KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
+
+ vcpu_args_set(vcpus[i], 1, gpa);
+
+ /*
+ * Map only what is needed so that an out-of-bounds access
+ * results #PF => SHUTDOWN instead of data corruption.
+ */
+ virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
+
+ pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
+ }
+
+ WRITE_ONCE(run_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(threads[i], NULL);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Allocate and free memory from the guest_memfd after closing the VM
+ * fd. The guest_memfd is gifted a reference to its owning VM, i.e.
+ * should prevent the VM from being fully destroyed until the last
+ * reference to the guest_memfd is also put.
+ */
+ r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ close(memfd);
+}
+
+static void usage(const char *cmd)
+{
+ puts("");
+ printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
+ puts("");
+ backing_src_help("-s");
+ puts("");
+ puts(" -n: specify the number of vcpus (default: 1)");
+ puts("");
+ puts(" -m: specify the number of memslots (default: 1)");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
+ uint32_t nr_memslots = 1;
+ uint32_t nr_vcpus = 1;
+ int opt;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
+ switch (opt) {
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'n':
+ nr_vcpus = atoi_positive("nr_vcpus", optarg);
+ break;
+ case 'm':
+ nr_memslots = atoi_positive("nr_memslots", optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ exit(0);
+ }
+ }
+
+ test_mem_conversions(src_type, nr_vcpus, nr_memslots);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <linux/kvm.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* Arbitrarily selected to avoid overlaps with anything else */
+#define EXITS_TEST_GVA 0xc0000000
+#define EXITS_TEST_GPA EXITS_TEST_GVA
+#define EXITS_TEST_NPAGES 1
+#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
+#define EXITS_TEST_SLOT 10
+
+static uint64_t guest_repeatedly_read(void)
+{
+ volatile uint64_t value;
+
+ while (true)
+ value = *((uint64_t *) EXITS_TEST_GVA);
+
+ return value;
+}
+
+static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = _vcpu_run(vcpu);
+ if (r) {
+ TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
+ }
+ return vcpu->run->exit_reason;
+}
+
+const struct vm_shape protected_vm_shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+};
+
+static void test_private_access_memslot_deleted(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ pthread_t vm_thread;
+ void *thread_return;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES,
+ KVM_MEM_GUEST_MEMFD);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ pthread_create(&vm_thread, NULL,
+ (void *(*)(void *))run_vcpu_get_exit_reason,
+ (void *)vcpu);
+
+ vm_mem_region_delete(vm, EXITS_TEST_SLOT);
+
+ pthread_join(vm_thread, &thread_return);
+ exit_reason = (uint32_t)(uint64_t)thread_return;
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+static void test_private_access_memslot_not_private(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ /* Add a non-private memslot (flags = 0) */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES, 0);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ exit_reason = run_vcpu_get_exit_reason(vcpu);
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ test_private_access_memslot_deleted();
+ test_private_access_memslot_not_private();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT 5 /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID 0xff
+
+static void *race(void *arg)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu = arg;
+
+ while (1) {
+ /* Trigger kvm_recalculate_apic_map(). */
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpuN;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ time_t t;
+ int i;
+
+ kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+ /*
+ * Create the max number of vCPUs supported by selftests so that KVM
+ * has decent amount of work to do when recalculating the map, i.e. to
+ * make the problematic window large enough to hit.
+ */
+ vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+ /*
+ * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+ * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+ */
+ for (i = 0; i < KVM_MAX_VCPUS; i++)
+ vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+ vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_NE(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_EQ(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void test_set_invalid_bsp(struct kvm_vm *vm)
+{
+ unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+ int r;
+
+ if (max_vcpu_id) {
+ r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
+ TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
+ }
+
+ r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
+ TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+ int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+ (void *)(unsigned long)vcpu->id);
+
+ TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ int stage;
+
+ for (stage = 0; stage < 2; stage++) {
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ test_set_bsp_busy(vcpu, "while running vm");
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(stage == 1,
+ "Expected GUEST_DONE in stage 2, got stage %d",
+ stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+ struct kvm_vcpu *vcpus[])
+{
+ struct kvm_vm *vm;
+ uint32_t i;
+
+ vm = vm_create(nr_vcpus);
+
+ test_set_invalid_bsp(vm);
+
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+ guest_not_bsp_vcpu);
+ return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+ test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+ run_vm_bsp(0);
+ run_vm_bsp(1);
+ run_vm_bsp(0);
+
+ check_set_bsp_busy();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
+do { \
+ struct kvm_sregs new; \
+ int rc; \
+ \
+ /* Skip the sub-test, the feature/bit is supported. */ \
+ if (orig.cr & bit) \
+ break; \
+ \
+ memcpy(&new, &orig, sizeof(sregs)); \
+ new.cr |= bit; \
+ \
+ rc = _vcpu_sregs_set(vcpu, &new); \
+ TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
+ \
+ /* Sanity check that KVM didn't change anything. */ \
+ vcpu_sregs_get(vcpu, &new); \
+ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
+} while (0)
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+ uint64_t cr4;
+
+ cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+ X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+ X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+ if (kvm_cpu_has(X86_FEATURE_UMIP))
+ cr4 |= X86_CR4_UMIP;
+ if (kvm_cpu_has(X86_FEATURE_LA57))
+ cr4 |= X86_CR4_LA57;
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ cr4 |= X86_CR4_VMXE;
+ if (kvm_cpu_has(X86_FEATURE_SMX))
+ cr4 |= X86_CR4_SMXE;
+ if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+ cr4 |= X86_CR4_FSGSBASE;
+ if (kvm_cpu_has(X86_FEATURE_PCID))
+ cr4 |= X86_CR4_PCIDE;
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ cr4 |= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_SMEP))
+ cr4 |= X86_CR4_SMEP;
+ if (kvm_cpu_has(X86_FEATURE_SMAP))
+ cr4 |= X86_CR4_SMAP;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ cr4 |= X86_CR4_PKE;
+
+ return cr4;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t cr4;
+ int rc, i;
+
+ /*
+ * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+ * use it to verify all supported CR4 bits can be set prior to defining
+ * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+ */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+
+ vcpu_sregs_get(vcpu, &sregs);
+
+ sregs.cr0 = 0;
+ sregs.cr4 |= calc_supported_cr4_feature_bits();
+ cr4 = sregs.cr4;
+
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+ sregs.cr4, cr4);
+
+ /* Verify all unsupported features are rejected by KVM. */
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+ for (i = 32; i < 64; i++)
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+ /* NW without CD is illegal, as is PG without PE. */
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
+ kvm_vm_free(vm);
+
+ /* Create a "real" VM and verify APIC_BASE can be set. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SVM_SEV_FEAT_DEBUG_SWAP 32u
+
+/*
+ * Some features may have hidden dependencies, or may only work
+ * for certain VM types. Err on the side of safety and don't
+ * expect that all supported features can be passed one by one
+ * to KVM_SEV_INIT2.
+ *
+ * (Well, right now there's only one...)
+ */
+#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
+
+int kvm_fd;
+u64 supported_vmsa_features;
+bool have_sev_es;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ struct kvm_sev_cmd cmd = {
+ .id = cmd_id,
+ .data = (uint64_t)data,
+ .sev_fd = open_sev_dev_path_or_exit(),
+ };
+ int ret;
+
+ ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+ TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
+ "%d failed: fw error: %d\n",
+ cmd_id, cmd.error);
+
+ return ret;
+}
+
+static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == 0,
+ "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
+ ret, errno);
+ kvm_vm_free(vm);
+}
+
+static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "KVM_SEV_INIT2 should fail, %s.",
+ msg);
+ kvm_vm_free(vm);
+}
+
+void test_vm_types(void)
+{
+ test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
+
+ /*
+ * TODO: check that unsupported types cannot be created. Probably
+ * a separate selftest.
+ */
+ if (have_sev_es)
+ test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+
+ test_init2_invalid(0, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_DEFAULT_VM");
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+ test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_SW_PROTECTED_VM");
+}
+
+void test_flags(uint32_t vm_type)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .flags = BIT(i) },
+ "invalid flag");
+}
+
+void test_features(uint32_t vm_type, uint64_t supported_features)
+{
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (!(supported_features & BIT_ULL(i)))
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
+ "unknown feature");
+ else if (KNOWN_FEATURES & BIT_ULL(i))
+ test_init2(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int kvm_fd = open_kvm_dev_path_or_exit();
+ bool have_sev;
+
+ TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES) == 0);
+ kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES,
+ &supported_vmsa_features);
+
+ have_sev = kvm_cpu_has(X86_FEATURE_SEV);
+ TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
+ "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
+ "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+
+ test_vm_types();
+
+ test_flags(KVM_X86_SEV_VM);
+ if (have_sev_es)
+ test_flags(KVM_X86_SEV_ES_VM);
+
+ test_features(KVM_X86_SEV_VM, 0);
+ if (have_sev_es)
+ test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+#include "kselftest.h"
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!es)
+ sev_vm_init(vm);
+ else
+ sev_es_vm_init(vm);
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
+ if (es)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+ return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!with_vcpus)
+ return vm;
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_migrate_from(dst, src);
+ TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+ struct kvm_vm *src_vm;
+ struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+ int i, ret;
+
+ src_vm = sev_vm_create(es);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ dst_vms[i] = aux_vm_create(true);
+
+ /* Initial migration from the src to the first dst. */
+ sev_migrate_from(dst_vms[0], src_vm);
+
+ for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+ sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+ /* Migrate the guest back to the original VM. */
+ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+ TEST_ASSERT(ret == -1 && errno == EIO,
+ "VM that was migrated from should be dead. ret %d, errno: %d", ret,
+ errno);
+
+ kvm_vm_free(src_vm);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+ struct kvm_vm *vm;
+ struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+ int i, j;
+ struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+ for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+ j = i % NR_LOCK_TESTING_THREADS;
+ __sev_migrate_from(input->vm, input->source_vms[j]);
+ }
+
+ return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+ struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+ pthread_t pt[NR_LOCK_TESTING_THREADS];
+ int i;
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+ input[i].vm = sev_vm_create(/* es= */ false);
+ input[0].source_vms[i] = input[i].vm;
+ }
+ for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+ memcpy(input[i].source_vms, input[0].source_vms,
+ sizeof(input[i].source_vms));
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_join(pt[i], NULL);
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+ *sev_es_vm_no_vmsa;
+ int ret;
+
+ vm_no_vcpu = vm_create_barebones();
+ vm_no_sev = aux_vm_create(true);
+ ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Migrations require SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ sev_es_vm_no_vmsa = vm_create_barebones();
+ sev_es_vm_init(sev_es_vm_no_vmsa);
+ __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+ ret = __sev_migrate_from(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(sev_es_vm);
+ kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+ kvm_vm_free(vm_no_vcpu);
+ kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_mirror_create(dst, src);
+ TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+ int cmd_id;
+
+ for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __vm_sev_ioctl(vm, cmd_id, NULL);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d",
+ cmd_id, ret, errno);
+ }
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+ struct kvm_vm *src_vm, *dst_vm;
+ int i;
+
+ src_vm = sev_vm_create(es);
+ dst_vm = aux_vm_create(false);
+
+ sev_mirror_create(dst_vm, src_vm);
+
+ /* Check that we can complete creation of the mirror VM. */
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(dst_vm, i);
+
+ if (es)
+ vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ verify_mirror_allowed_cmds(dst_vm);
+
+ kvm_vm_free(src_vm);
+ kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+ int ret;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ vm_with_vcpu = aux_vm_create(true);
+ vm_no_vcpu = aux_vm_create(false);
+
+ ret = __sev_mirror_create(sev_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to self. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Copy context requires SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
+ ret, errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ ret = __sev_mirror_create(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_es_vm);
+
+out:
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(vm_with_vcpu);
+ kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+ struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+ *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ dst2_vm = aux_vm_create(true);
+ dst3_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+ dst2_mirror_vm = aux_vm_create(false);
+ dst3_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ sev_migrate_from(dst2_vm, dst_vm);
+ sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+ sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+ sev_migrate_from(dst3_vm, dst2_vm);
+
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(dst2_vm);
+ kvm_vm_free(dst3_vm);
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst2_mirror_vm);
+ kvm_vm_free(dst3_mirror_vm);
+
+ /*
+ * Run similar test be destroy mirrors before mirrored VMs to ensure
+ * destruction is done safely.
+ */
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+ test_sev_migrate_from(/* es= */ false);
+ if (have_sev_es)
+ test_sev_migrate_from(/* es= */ true);
+ test_sev_migrate_locking();
+ test_sev_migrate_parameters();
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+ test_sev_move_copy();
+ }
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+ test_sev_mirror(/* es= */ false);
+ if (have_sev_es)
+ test_sev_mirror(/* es= */ true);
+ test_sev_mirror_parameters();
+ }
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <math.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+
+static void guest_sev_es_code(void)
+{
+ /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+ /*
+ * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
+ * force "termination" to signal "done" via the GHCB MSR protocol.
+ */
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+ GUEST_DONE();
+}
+
+/* Stash state passed via VMSA before any compiled code runs. */
+extern void guest_code_xsave(void);
+asm("guest_code_xsave:\n"
+ "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
+ "xor %edx, %edx\n"
+ "xsave (%rdi)\n"
+ "jmp guest_sev_es_code");
+
+static void compare_xsave(u8 *from_host, u8 *from_guest)
+{
+ int i;
+ bool bad = false;
+ for (i = 0; i < 4095; i++) {
+ if (from_host[i] != from_guest[i]) {
+ printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ abort();
+}
+
+static void test_sync_vmsa(uint32_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t gva;
+ void *hva;
+
+ double x87val = M_PI;
+ struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
+
+ vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+ MEM_REGION_TEST_DATA);
+ hva = addr_gva2hva(vm, gva);
+
+ vcpu_args_set(vcpu, 1, gva);
+
+ asm("fninit\n"
+ "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
+ "fldl %3\n"
+ "xsave (%2)\n"
+ "fstp %%st\n"
+ : "=m"(xsave)
+ : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
+ : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
+ vcpu_xsave_set(vcpu, &xsave);
+
+ vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+
+ /* This page is shared, so make it decrypted. */
+ memset(hva, 0, 4096);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+
+ compare_xsave((u8 *)&xsave, (u8 *)hva);
+
+ kvm_vm_free(vm);
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
+
+ /* TODO: Validate the measurement is as expected. */
+ vm_sev_launch(vm, policy, NULL);
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ if (policy & SEV_POLICY_ES) {
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+ break;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void guest_shutdown_code(void)
+{
+ struct desc_ptr idt;
+
+ /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
+ memset(&idt, 0, sizeof(idt));
+ __asm__ __volatile__("lidt %0" :: "m"(idt));
+
+ __asm__ __volatile__("ud2");
+}
+
+static void test_sev_es_shutdown(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ uint32_t type = KVM_X86_SEV_ES_VM;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
+
+ vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Wanted SHUTDOWN, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ const u64 xf_mask = XFEATURE_MASK_X87_AVX;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+ test_sev(guest_sev_code, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+ test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+ test_sev(guest_sev_es_code, SEV_POLICY_ES);
+
+ test_sev_es_shutdown();
+
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(0);
+ test_sync_vmsa(SEV_POLICY_NO_DBG);
+ }
+ }
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
+ */
+#include "flds_emulation.h"
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(bool tdp_enabled)
+{
+ uint64_t error_code;
+ uint64_t vector;
+
+ vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+ /*
+ * When TDP is enabled, flds will trigger an emulation failure, exit to
+ * userspace, and then the selftest host "VMM" skips the instruction.
+ *
+ * When TDP is disabled, no instruction emulation is required so flds
+ * should generate #PF(RSVD).
+ */
+ if (tdp_enabled) {
+ GUEST_ASSERT(!vector);
+ } else {
+ GUEST_ASSERT_EQ(vector, PF_VECTOR);
+ GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ uint64_t *pte;
+ uint64_t *hva;
+ uint64_t gpa;
+ int rc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+
+ pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
+ *pte |= BIT_ULL(MAXPHYADDR);
+
+ vcpu_run(vcpu);
+
+ /*
+ * When TDP is enabled, KVM must emulate in response the guest physical
+ * address that is illegal from the guest's perspective, but is legal
+ * from hardware's perspeective. This should result in an emulation
+ * failure exit to userspace since KVM doesn't support emulating flds.
+ */
+ if (kvm_is_tdp_enabled()) {
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+#include "svm_util.h"
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+static inline void sync_with_host(uint64_t phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+ : "+a" (phase));
+}
+
+static void self_smi(void)
+{
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+static void l2_guest_code(void)
+{
+ sync_with_host(8);
+
+ sync_with_host(10);
+
+ vmcall();
+}
+
+static void guest_code(void *arg)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ struct svm_test_data *svm = arg;
+ struct vmx_pages *vmx_pages = arg;
+
+ sync_with_host(1);
+
+ wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+ sync_with_host(2);
+
+ self_smi();
+
+ sync_with_host(4);
+
+ if (arg) {
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ }
+
+ sync_with_host(5);
+
+ self_smi();
+
+ sync_with_host(7);
+
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+ vmresume();
+ }
+
+ /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+ sync_with_host(12);
+ }
+
+ sync_with_host(DONE);
+}
+
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+ vcpu_events_set(vcpu, &events);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ struct kvm_x86_state *state;
+ int stage, stage_reported;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+ == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+ memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+ sizeof(smi_handler));
+
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_gva);
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ }
+
+ if (!nested_gva)
+ pr_info("will skip SMM test with VMX enabled\n");
+
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ memset(®s, 0, sizeof(regs));
+ vcpu_regs_get(vcpu, ®s);
+
+ stage_reported = regs.rax & 0xff;
+
+ if (stage_reported == DONE)
+ goto done;
+
+ TEST_ASSERT(stage_reported == stage ||
+ stage_reported == SMRAM_STAGE,
+ "Unexpected stage: #%x, got %x",
+ stage, stage_reported);
+
+ /*
+ * Enter SMM during L2 execution and check that we correctly
+ * return from it. Do not perform save/restore while in SMM yet.
+ */
+ if (stage == 8) {
+ inject_smi(vcpu);
+ continue;
+ }
+
+ /*
+ * Perform save/restore while the guest is in SMM triggered
+ * during L2 execution.
+ */
+ if (stage == 10)
+ inject_smi(vcpu);
+
+ state = vcpu_save_state(vcpu);
+ kvm_vm_release(vm);
+
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+void svm_l2_guest_code(void)
+{
+ GUEST_SYNC(4);
+ /* Exit to L1 */
+ vmcall();
+ GUEST_SYNC(6);
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, svm_l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(3);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(5);
+ vmcb->save.rip += 3;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+ GUEST_SYNC(6);
+
+ /* Exit to L1 */
+ vmcall();
+
+ /* L1 has now set up a shadow VMCS for us. */
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+ GUEST_SYNC(10);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+ GUEST_SYNC(11);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+ GUEST_SYNC(12);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, vmx_l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(5);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* Check that the launched state is preserved. */
+ GUEST_ASSERT(vmlaunch());
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_SYNC(7);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+ vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+ GUEST_ASSERT(vmlaunch());
+ GUEST_SYNC(8);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+
+ vmwrite(GUEST_RIP, 0xc0ffee);
+ GUEST_SYNC(9);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+ GUEST_SYNC(13);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+}
+
+static void __attribute__((__flatten__)) guest_code(void *arg)
+{
+ GUEST_SYNC(1);
+
+ if (this_cpu_has(X86_FEATURE_XSAVE)) {
+ uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+ uint8_t buffer[4096];
+
+ memset(buffer, 0xcc, sizeof(buffer));
+
+ /*
+ * Modify state for all supported xfeatures to take them out of
+ * their "init" state, i.e. to make them show up in XSTATE_BV.
+ *
+ * Note off-by-default features, e.g. AMX, are out of scope for
+ * this particular testcase as they have a different ABI.
+ */
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+ asm volatile ("fincstp");
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+ asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_YMM)
+ asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+ asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+ asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+ asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+ }
+
+ if (this_cpu_has(X86_FEATURE_MPX)) {
+ uint64_t bounds[2] = { 10, 0xffffffffull };
+ uint64_t output[2] = { };
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+ /*
+ * Don't bother trying to get BNDCSR into the INUSE
+ * state. MSR_IA32_BNDCFGS doesn't count as it isn't
+ * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+ * modified by XRSTOR. Stuffing XSTATE_BV in the host
+ * is simpler than doing XRSTOR here in the guest.
+ *
+ * However, temporarily enable MPX in BNDCFGS so that
+ * BNDMOV actually loads BND1. If MPX isn't *fully*
+ * enabled, all MPX instructions are treated as NOPs.
+ *
+ * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+ * mnemonics/registers has been removed from gcc and
+ * clang (and was never fully supported by clang).
+ */
+ wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+ asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+ /*
+ * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+ * that BND1 actually got loaded.
+ */
+ asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+ wrmsr(MSR_IA32_BNDCFGS, 0);
+
+ GUEST_ASSERT_EQ(bounds[0], output[0]);
+ GUEST_ASSERT_EQ(bounds[1], output[1]);
+ }
+ if (this_cpu_has(X86_FEATURE_PKU)) {
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+ set_cr4(get_cr4() | X86_CR4_PKE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+ wrpkru(-1u);
+ }
+ }
+
+ GUEST_SYNC(2);
+
+ if (arg) {
+ if (this_cpu_has(X86_FEATURE_SVM))
+ svm_l1_guest_code(arg);
+ else
+ vmx_l1_guest_code(arg);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ uint64_t *xstate_bv, saved_xstate_bv;
+ vm_vaddr_t nested_gva = 0;
+ struct kvm_cpuid2 empty_cpuid = {};
+ struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu, *vcpuN;
+ struct kvm_vm *vm;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_regs_get(vcpu, ®s1);
+
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_gva);
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ }
+
+ if (!nested_gva)
+ pr_info("will skip nested state checks\n");
+
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ state = vcpu_save_state(vcpu);
+ memset(®s1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, ®s1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+
+ /*
+ * Restore XSAVE state in a dummy vCPU, first without doing
+ * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
+ * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+ * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
+ * load only XSAVE state, MSRs in particular have a much more
+ * convoluted ABI.
+ *
+ * Load two versions of XSAVE state: one with the actual guest
+ * XSAVE state, and one with all supported features forced "on"
+ * in xstate_bv, e.g. to ensure that KVM allows loading all
+ * supported features, even if something goes awry in saving
+ * the original snapshot.
+ */
+ xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ saved_xstate_bv = *xstate_bv;
+
+ vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = kvm_cpu_supported_xcr0();
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ vcpu_init_cpuid(vcpuN, &empty_cpuid);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = saved_xstate_bv;
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ kvm_x86_state_cleanup(state);
+
+ memset(®s2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, ®s2);
+ TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
+ idt[11].p = 0; // #NP is not intercepted and will cause another
+ // #NP that will be converted to #DF
+ idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+ bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+ int_fired++;
+ GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+ GUEST_ASSERT_EQ(int_fired, 1);
+
+ /*
+ * Same as the vmmcall() function, but with a ud2 sneaked after the
+ * vmmcall. The caller injects an exception with the return address
+ * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
+ * use vmmcall() directly.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+
+ GUEST_ASSERT_EQ(bp_fired, 1);
+ hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+ nmi_stage_inc();
+
+ if (nmi_stage_get() == 1) {
+ vmmcall();
+ GUEST_FAIL("Unexpected resume after VMMCALL");
+ } else {
+ GUEST_ASSERT_EQ(nmi_stage_get(), 3);
+ GUEST_DONE();
+ }
+}
+
+static void l2_guest_code_nmi(void)
+{
+ ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (is_nmi)
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm,
+ is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+ vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+ if (is_nmi) {
+ vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ } else {
+ vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+ /* The return address pushed on stack */
+ vmcb->control.next_rip = vmcb->save.rip;
+ }
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ if (is_nmi) {
+ clgi();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+ GUEST_ASSERT_EQ(nmi_stage_get(), 1);
+ nmi_stage_inc();
+
+ stgi();
+ /* self-NMI happens here */
+ while (true)
+ cpu_relax();
+ }
+
+ /* Skip over VMMCALL */
+ vmcb->save.rip += 3;
+
+ /* Switch to alternate IDT to cause intervening NPF again */
+ vmcb->save.idtr.base = idt_alt;
+ vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+ vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+ /* The return address pushed on stack, skip over UD2 */
+ vmcb->control.next_rip = vmcb->save.rip + 2;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+ "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t svm_gva;
+ vm_vaddr_t idt_alt_vm;
+ struct kvm_guest_debug debug;
+
+ pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+ vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ if (!is_nmi) {
+ void *idt, *idt_alt;
+
+ idt_alt_vm = vm_vaddr_alloc_page(vm);
+ idt_alt = addr_gva2hva(vm, idt_alt_vm);
+ idt = addr_gva2hva(vm, vm->arch.idt);
+ memcpy(idt_alt, idt, getpagesize());
+ } else {
+ idt_alt_vm = 0;
+ }
+ vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+ "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+ atomic_init(&nmi_stage, 0);
+
+ run_test(false);
+ run_test(true);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * Nested SVM testing: VMCALL
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
+void guest_code(void)
+{
+ asm volatile("1: in %[port], %%al\n"
+ "add $0x1, %%rbx\n"
+ "jmp 1b"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
+}
+
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx", \
+ left->reg, right->reg)
+ REG_COMPARE(rax);
+ REG_COMPARE(rbx);
+ REG_COMPARE(rcx);
+ REG_COMPARE(rdx);
+ REG_COMPARE(rsi);
+ REG_COMPARE(rdi);
+ REG_COMPARE(rsp);
+ REG_COMPARE(rbp);
+ REG_COMPARE(r8);
+ REG_COMPARE(r9);
+ REG_COMPARE(r10);
+ REG_COMPARE(r11);
+ REG_COMPARE(r12);
+ REG_COMPARE(r13);
+ REG_COMPARE(r14);
+ REG_COMPARE(r15);
+ REG_COMPARE(rip);
+ REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+ struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.injected, 1);
+ WRITE_ONCE(events->exception.pending, 1);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events. KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+ WRITE_ONCE(events->exception.pending, 1);
+ WRITE_ONCE(events->exception.nr, 255);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ __u64 *cr4 = &run->s.regs.sregs.cr4;
+ __u64 pae_enabled = *cr4;
+ __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+ WRITE_ONCE(*cr4, pae_enabled);
+ asm volatile(".rept 512\n\t"
+ "nop\n\t"
+ ".endr");
+ WRITE_ONCE(*cr4, pae_disabled);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
+{
+ const time_t TIMEOUT = 2; /* seconds, roughly */
+ struct kvm_x86_state *state;
+ struct kvm_translation tr;
+ struct kvm_run *run;
+ pthread_t thread;
+ time_t t;
+
+ run = vcpu->run;
+
+ run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ run->kvm_valid_regs = 0;
+
+ /* Save state *before* spawning the thread that mucks with vCPU state. */
+ state = vcpu_save_state(vcpu);
+
+ /*
+ * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+ * should already be set in guest state.
+ */
+ TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+ (run->s.regs.sregs.efer & EFER_LME),
+ "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+ !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+ !!(run->s.regs.sregs.efer & EFER_LME));
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ /*
+ * Reload known good state if the vCPU triple faults, e.g. due
+ * to the unhandled #GPs being injected. VMX preserves state
+ * on shutdown, but SVM synthesizes an INIT as the VMCB state
+ * is architecturally undefined on triple fault.
+ */
+ if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+ vcpu_load_state(vcpu, state);
+
+ if (racer == race_sregs_cr4) {
+ tr = (struct kvm_translation) { .linear_address = 0 };
+ __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+ }
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ /* Request and verify all valid register sets. */
+ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ vcpu_regs_get(vcpu, ®s);
+ compare_regs(®s, &run->s.regs.regs);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vcpu, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ /* Set and verify various register values. */
+ run->s.regs.regs.rbx = 0xBAD1DEA;
+ run->s.regs.sregs.apic_base = 1 << 11;
+ /* TODO run->s.regs.events.XYZ = ABC; */
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+ TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+ "apic_base sync regs value incorrect 0x%llx.",
+ run->s.regs.sregs.apic_base);
+
+ vcpu_regs_get(vcpu, ®s);
+ compare_regs(®s, &run->s.regs.regs);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vcpu, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.rbx = 0xDEADBEEF;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+ * Verify s.regs values are not overwritten with existing guest values
+ * and that guest values are not overwritten with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.rbx = 0xAAAA;
+ vcpu_regs_get(vcpu, ®s);
+ regs.rbx = 0xBAC0;
+ vcpu_regs_set(vcpu, ®s);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+ vcpu_regs_get(vcpu, ®s);
+ TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
+ "rbx guest value incorrect 0x%llx.",
+ regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+ * with existing guest values but that guest values are overwritten
+ * with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+ run->s.regs.regs.rbx = 0xBBBB;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+ vcpu_regs_get(vcpu, ®s);
+ TEST_ASSERT(regs.rbx == 0xBBBB + 1,
+ "rbx guest value incorrect 0x%llx.",
+ regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+ race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+ race_sync_regs(vcpu, race_events_exc);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+ race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+ int cap;
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+ TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
+
+ return test_harness_run(argc, argv);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+#define L2_GUEST_STACK_SIZE 64
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ /* L2 should triple fault after a triple fault event injected. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* don't intercept shutdown to test the case of SVM allowing to do so */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here, L1 should crash */
+ GUEST_ASSERT(0);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vcpu_events events;
+ struct ucall uc;
+
+ bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+ bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+ TEST_REQUIRE(has_vmx || has_svm);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+
+ if (has_vmx) {
+ vm_vaddr_t vmx_pages_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ } else {
+ vm_vaddr_t svm_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+ }
+
+ vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+ run = vcpu->run;
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+ vcpu_events_get(vcpu, &events);
+ events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+ events.triple_fault.pending = true;
+ vcpu_events_set(vcpu, &events);
+ run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+ "Triple fault event invalid");
+ TEST_ASSERT(events.triple_fault.pending,
+ "No triple fault pending");
+ vcpu_run(vcpu);
+
+
+ if (has_svm) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+ } else {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UNITY (1ull << 30)
+#define HOST_ADJUST (UNITY * 64)
+#define GUEST_STEP (UNITY * 4)
+#define ROUND(x) ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x) ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x))
+
+static void guest_code(void)
+{
+ u64 val = 0;
+
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ val = 1ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ GUEST_SYNC(2);
+ val = 2ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Host: setting the TSC offset. */
+ GUEST_SYNC(3);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ GUEST_SYNC(4);
+ val = 3ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ GUEST_SYNC(5);
+ val = 4ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ if (!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1)
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ else
+ ksft_test_result_fail(
+ "stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t val;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ val = 0;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ run_vcpu(vcpu, 1);
+ val = 1ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ run_vcpu(vcpu, 2);
+ val = 2ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Host: writes to MSR_IA32_TSC set the host-side offset
+ * and therefore do not change MSR_IA32_TSC_ADJUST.
+ */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ run_vcpu(vcpu, 3);
+
+ /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+ /* Restore previous value. */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ run_vcpu(vcpu, 4);
+ val = 3ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ run_vcpu(vcpu, 5);
+ val = 4ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ 2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+ uint64_t start_tsc, local_tsc, tmp;
+
+ start_tsc = rdtsc();
+ do {
+ tmp = READ_ONCE(tsc_sync);
+ local_tsc = rdtsc();
+ WRITE_ONCE(tsc_sync, local_tsc);
+ if (unlikely(local_tsc < tmp))
+ GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+ } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+ GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+ unsigned long vcpu_id = (unsigned long)_cpu_nr;
+ unsigned long failures = 0;
+ static bool first_cpu_done;
+ struct kvm_vcpu *vcpu;
+
+ /* The kernel is fine, but vm_vcpu_add() needs locking */
+ pthread_spin_lock(&create_lock);
+
+ vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+ if (!first_cpu_done) {
+ first_cpu_done = true;
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ }
+
+ pthread_spin_unlock(&create_lock);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto out;
+
+ case UCALL_SYNC:
+ printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+ uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+ failures++;
+ break;
+
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+ out:
+ return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+ vm = vm_create(NR_TEST_VCPUS);
+ vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+ pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+ pthread_t cpu_threads[NR_TEST_VCPUS];
+ unsigned long cpu;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+ pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+ unsigned long failures = 0;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+ void *this_cpu_failures;
+ pthread_join(cpu_threads[cpu], &this_cpu_failures);
+ failures += (unsigned long)this_cpu_failures;
+ }
+
+ TEST_ASSERT(!failures, "TSC sync failed");
+ pthread_spin_destroy(&create_lock);
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR 0xa9
+
+#define UCNA_BANK 0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+ struct kvm_vcpu *vcpu;
+ uint64_t *p_i_ucna_rcvd;
+ uint64_t *p_i_ucna_addr;
+ uint64_t *p_ucna_addr;
+ uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+ uint64_t ctl2;
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+ xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ /* Enables interrupt in guest. */
+ asm volatile("sti");
+
+ /* Let user space inject the first UCNA */
+ GUEST_SYNC(SYNC_FIRST_UCNA);
+
+ ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+ /* Disables the per-bank CMCI signaling. */
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+ /* Let the user space inject the second UCNA */
+ GUEST_SYNC(SYNC_SECOND_UCNA);
+
+ ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+ GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+ i_ucna_rcvd++;
+ i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+ printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+ /*
+ * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+ * the IA32_MCi_STATUS register.
+ * MSCOD=1 (BIT[16] - MscodDataRdErr).
+ * MCACOD=0x0090 (Memory controller error format, channel 0)
+ */
+ uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+ struct kvm_x86_mce mce = {};
+ mce.status = status;
+ mce.mcg_status = 0;
+ /*
+ * MCM_ADDR_PHYS indicates the reported address is a physical address.
+ * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+ * is at 4KB granularity.
+ */
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.addr = addr;
+ mce.bank = UCNA_BANK;
+
+ vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed with errno=%d",
+ r);
+
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+ printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+ printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false, "vCPU assertion failure: %s.",
+ (const char *)uc.args[0]);
+ }
+
+ return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ params->vcpu = vcpu;
+ params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+ params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+ params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+ params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+ run_ucna_injection(params);
+
+ TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+ TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+ "Only first UCNA reported addr get recorded via interrupt.");
+ TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+ "First injected UCNAs should get exposed via registers.");
+ TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+ "Second injected UCNAs should get exposed via registers.");
+
+ printf("Test successful.\n"
+ "UCNA CMCI interrupts received: %ld\n"
+ "Last UCNA address received via CMCI: %lx\n"
+ "First UCNA address in vCPU thread: %lx\n"
+ "Second UCNA address in vCPU thread: %lx\n",
+ *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+ *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+ uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+ if (enable_cmci_p)
+ mcg_caps |= MCG_CMCI_P;
+
+ mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+ vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+ bool enable_cmci_p, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+ setup_mce_cap(vcpu, enable_cmci_p);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ struct thread_params params;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *ucna_vcpu;
+ struct kvm_vcpu *cmcidis_vcpu;
+ struct kvm_vcpu *cmci_vcpu;
+
+ kvm_check_cap(KVM_CAP_MCE);
+
+ vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
+
+ kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+ &supported_mcg_caps);
+
+ if (!(supported_mcg_caps & MCG_CMCI_P)) {
+ print_skip("MCG_CMCI_P is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+ cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+ cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+ vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_ucna_injection(ucna_vcpu, ¶ms);
+ run_vcpu_expect_gp(cmcidis_vcpu);
+ run_vcpu_expect_gp(cmci_vcpu);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+ unsigned long end;
+
+ if (count == 2)
+ end = (unsigned long)buffer + 1;
+ else
+ end = (unsigned long)buffer + 8192;
+
+ asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+ GUEST_ASSERT_EQ(count, 0);
+ GUEST_ASSERT_EQ((unsigned long)buffer, end);
+}
+
+static void guest_code(void)
+{
+ uint8_t buffer[8192];
+ int i;
+
+ /*
+ * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
+ * test that KVM doesn't explode when userspace modifies the "count" on
+ * a userspace I/O exit. KVM isn't required to play nice with the I/O
+ * itself as KVM doesn't support manipulating the count, it just needs
+ * to not explode or overflow a buffer.
+ */
+ guest_ins_port80(buffer, 2);
+ guest_ins_port80(buffer, 3);
+
+ /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+ memset(buffer, 0, sizeof(buffer));
+ guest_ins_port80(buffer, 8192);
+ for (i = 0; i < 8192; i++)
+ __GUEST_ASSERT(buffer[i] == 0xaa,
+ "Expected '0xaa', got '0x%x' at buffer[%u]",
+ buffer[i], i);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ memset(®s, 0, sizeof(regs));
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc))
+ break;
+
+ TEST_ASSERT(run->io.port == 0x80,
+ "Expected I/O at port 0x80, got port 0x%x", run->io.port);
+
+ /*
+ * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+ * Note, this abuses KVM's batching of rep string I/O to avoid
+ * getting stuck in an infinite loop. That behavior isn't in
+ * scope from a testing perspective as it's not ABI in any way,
+ * i.e. it really is abusing internal KVM knowledge.
+ */
+ vcpu_regs_get(vcpu, ®s);
+ if (regs.rcx == 2)
+ regs.rcx = 1;
+ if (regs.rcx == 3)
+ regs.rcx = 8192;
+ memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ vcpu_regs_set(vcpu, ®s);
+ }
+
+ switch (uc.cmd) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+#include <sys/ioctl.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test an MSR the kernel knows about. */
+ .base = MSR_IA32_XSS,
+ .bitmap = (uint8_t*)&deny_bits,
+ }, {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test an MSR the kernel doesn't know about. */
+ .base = MSR_IA32_FLUSH_CMD,
+ .bitmap = (uint8_t*)&deny_bits,
+ }, {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test a fabricated MSR that no one knows about. */
+ .base = MSR_NON_EXISTENT,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+struct kvm_msr_filter filter_fs = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = MSR_FS_BASE,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+struct kvm_msr_filter filter_gs = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = MSR_GS_BASE,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+ u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+ bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+ memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+ memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+ memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+ memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+ memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+ deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+ deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+ deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+ .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000_write,
+ }, {
+ .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+ .base = 0x40000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_40000000,
+ }, {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000_read,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+ .base = 0xdeadbeef,
+ .nmsrs = 1,
+ .bitmap = bitmap_deadbeef,
+ },
+ },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+ "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+ "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+ "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+ "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+ uint64_t data;
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+ *
+ * A GP is thrown if anything other than 0 is written to
+ * MSR_IA32_XSS.
+ */
+ data = test_rdmsr(MSR_IA32_XSS);
+ GUEST_ASSERT(data == 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_wrmsr(MSR_IA32_XSS, 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_wrmsr(MSR_IA32_XSS, 1);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+ *
+ * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+ * from or if a value other than 1 is written to it.
+ */
+ test_rdmsr(MSR_IA32_FLUSH_CMD);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+ *
+ * Test that a fabricated MSR can pass through the kernel
+ * and be handled in userspace.
+ */
+ test_wrmsr(MSR_NON_EXISTENT, 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ data = test_rdmsr(MSR_NON_EXISTENT);
+ GUEST_ASSERT(data == 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ if (is_forced_emulation_enabled) {
+ /* Let userspace know we aren't done. */
+ GUEST_SYNC(0);
+
+ /*
+ * Now run the same tests with the instruction emulator.
+ */
+ data = test_em_rdmsr(MSR_IA32_XSS);
+ GUEST_ASSERT(data == 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+ test_em_wrmsr(MSR_IA32_XSS, 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+ test_em_wrmsr(MSR_IA32_XSS, 1);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+ GUEST_ASSERT(guest_exception_count == 1);
+ test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+ GUEST_ASSERT(guest_exception_count == 1);
+ test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_em_wrmsr(MSR_NON_EXISTENT, 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+ data = test_em_rdmsr(MSR_NON_EXISTENT);
+ GUEST_ASSERT(data == 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+ /* This goes into the in-kernel emulation */
+ wrmsr(MSR_SYSCALL_MASK, 0);
+
+ if (trapped) {
+ /* This goes into user space emulation */
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+ } else {
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+ }
+
+ /* If trapped == true, this goes into user space emulation */
+ wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+ /* This goes into the in-kernel emulation */
+ rdmsr(MSR_IA32_POWER_CTL);
+
+ /* Invalid MSR, should always be handled by user space exit */
+ GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+ wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+ guest_msr_calls(true);
+
+ /*
+ * Disable msr filtering, so that the kernel
+ * handles everything in the next round
+ */
+ GUEST_SYNC(0);
+
+ guest_msr_calls(false);
+
+ GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+ uint64_t data;
+
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data == MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
+ /* Let userspace know to switch the filter */
+ GUEST_SYNC(0);
+
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data == MSR_GS_BASE);
+
+ GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+ char *r_start, char *r_end,
+ char *w_start, char *w_end)
+{
+ if (regs->rip == (uintptr_t)r_start) {
+ regs->rip = (uintptr_t)r_end;
+ regs->rax = 0;
+ regs->rdx = 0;
+ } else if (regs->rip == (uintptr_t)w_start) {
+ regs->rip = (uintptr_t)w_end;
+ } else {
+ GUEST_ASSERT(!"RIP is at an unknown location!");
+ }
+
+ ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+ &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+ __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+ &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ }
+}
+
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ struct kvm_run *run = vcpu->run;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
+ TEST_ASSERT(run->msr.index == msr_index,
+ "Unexpected msr (0x%04x), expected 0x%04x",
+ run->msr.index, msr_index);
+
+ switch (run->msr.index) {
+ case MSR_IA32_XSS:
+ run->msr.data = 0;
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ run->msr.error = 1;
+ break;
+ case MSR_NON_EXISTENT:
+ run->msr.data = msr_non_existent_data;
+ break;
+ case MSR_FS_BASE:
+ run->msr.data = MSR_FS_BASE;
+ break;
+ case MSR_GS_BASE:
+ run->msr.data = MSR_GS_BASE;
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+ }
+}
+
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ struct kvm_run *run = vcpu->run;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
+ TEST_ASSERT(run->msr.index == msr_index,
+ "Unexpected msr (0x%04x), expected 0x%04x",
+ run->msr.index, msr_index);
+
+ switch (run->msr.index) {
+ case MSR_IA32_XSS:
+ if (run->msr.data != 0)
+ run->msr.error = 1;
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ if (run->msr.data != 1)
+ run->msr.error = 1;
+ break;
+ case MSR_NON_EXISTENT:
+ msr_non_existent_data = run->msr.data;
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+ }
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+ "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+ uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc = {};
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ check_for_guest_assert(vcpu);
+ break;
+ case UCALL_DONE:
+ process_ucall_done(vcpu);
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected ucall");
+ }
+
+ return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
+{
+ vcpu_run(vcpu);
+ process_rdmsr(vcpu, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
+{
+ vcpu_run(vcpu);
+ process_wrmsr(vcpu, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ return process_ucall(vcpu);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ process_ucall_done(vcpu);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t cmd;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ /* Process guest code userspace exits. */
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+ vcpu_run(vcpu);
+ cmd = process_ucall(vcpu);
+
+ if (is_forced_emulation_enabled) {
+ TEST_ASSERT_EQ(cmd, UCALL_SYNC);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
+
+ /* Process emulated rdmsr and wrmsr instructions. */
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+ /* Confirm the guest completed without issues. */
+ run_guest_then_process_ucall_done(vcpu);
+ } else {
+ TEST_ASSERT_EQ(cmd, UCALL_DONE);
+ printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+ }
+}
+
+static int handle_ucall(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+ break;
+ case UCALL_DONE:
+ return 1;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+ run->msr.data = run->msr.index;
+ msr_reads++;
+
+ if (run->msr.index == MSR_SYSCALL_MASK ||
+ run->msr.index == MSR_GS_BASE) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR read trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "MSR deadbeef read trap w/o inval fault");
+ }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+ /* ignore */
+ msr_writes++;
+
+ if (run->msr.index == MSR_IA32_POWER_CTL) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for MSR_IA32_POWER_CTL incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR_IA32_POWER_CTL trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for deadbeef incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "deadbeef trap w/o inval fault");
+ }
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_run *run = vcpu->run;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ prepare_bitmaps();
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_X86_RDMSR:
+ handle_rdmsr(run);
+ break;
+ case KVM_EXIT_X86_WRMSR:
+ handle_wrmsr(run);
+ break;
+ case KVM_EXIT_IO:
+ if (handle_ucall(vcpu))
+ goto done;
+ break;
+ }
+
+ }
+
+done:
+ TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+ TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+ run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+ TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+ "Expected ucall state to be UCALL_SYNC.");
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+ run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+ run_guest_then_process_ucall_done(vcpu);
+}
+
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \
+({ \
+ int r = __vm_ioctl(vm, cmd, arg); \
+ \
+ if (flag & valid_mask) \
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \
+ else \
+ TEST_ASSERT(r == -1 && errno == EINVAL, \
+ "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \
+ #cmd, flag, r, errno, strerror(errno)); \
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
+{
+ struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+ int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+ int rc;
+ int i;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+
+ for (i = 0; i < nflags; i++) {
+ cap.args[0] = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+ BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+ }
+}
+
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+ u64 deny_bits = 0;
+ struct kvm_msr_filter filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = 0,
+ .bitmap = (uint8_t *)&deny_bits,
+ },
+ },
+ };
+ int nflags;
+ int rc;
+ int i;
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+ }
+
+ filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+ nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.ranges[0].flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+ }
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+ run_user_space_msr_flag_test(vm);
+
+ /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+ run_msr_filter_flag_test(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness_run(argc, argv);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+static void l2_guest_code(void)
+{
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+ vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+ /* Try to launch L2 with the memory-backed APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+ /* Try to resume L2 with the unbacked APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long apic_access_addr = ~0ul;
+ vm_vaddr_t vmx_pages_gva;
+ unsigned long high_gpa;
+ struct vmx_pages *vmx;
+ bool done = false;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ high_gpa = (vm->max_gfn - 1) << vm->page_shift;
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
+
+ while (!done) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ if (apic_access_addr == high_gpa) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->internal.suberror ==
+ KVM_INTERNAL_ERROR_EMULATION,
+ "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
+ run->internal.suberror);
+ break;
+ }
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ apic_access_addr = uc.args[1];
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+ }
+ }
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_close_while_nested
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Verify that nothing bad happens if a KVM user exits with open
+ * file descriptors while executing a nested guest.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+enum {
+ PORT_L0_EXIT = 0x2000,
+};
+
+static void l2_guest_code(void)
+{
+ /* Exit to L0 */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (run->io.port == PORT_L0_EXIT)
+ break;
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+#define TEST_MEM_PAGES 3
+
+/* L1 guest test virtual memory offset */
+#define GUEST_TEST_MEM 0xc0000000
+
+/* L2 guest test virtual memory offset */
+#define NESTED_TEST_MEM1 0xc0001000
+#define NESTED_TEST_MEM2 0xc0002000
+
+static void l2_guest_code(u64 *a, u64 *b)
+{
+ READ_ONCE(*a);
+ WRITE_ONCE(*a, 1);
+ GUEST_SYNC(true);
+ GUEST_SYNC(false);
+
+ WRITE_ONCE(*b, 1);
+ GUEST_SYNC(true);
+ WRITE_ONCE(*b, 1);
+ GUEST_SYNC(true);
+ GUEST_SYNC(false);
+
+ /* Exit to L1 and never come back. */
+ vmcall();
+}
+
+static void l2_guest_code_ept_enabled(void)
+{
+ l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
+}
+
+static void l2_guest_code_ept_disabled(void)
+{
+ /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
+ l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_ept_enabled;
+ else
+ l2_rip = l2_guest_code_ept_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(false);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_SYNC(false);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+static void test_vmx_dirty_log(bool enable_ept)
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct vmx_pages *vmx;
+ unsigned long *bmap;
+ uint64_t *host_test_mem;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool done = false;
+
+ pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ GUEST_TEST_MEM,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ /*
+ * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
+ * affects both L1 and L2. However...
+ */
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
+
+ /*
+ * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
+ * 0xc0000000.
+ *
+ * Note that prepare_eptp should be called only L1's GPA map is done,
+ * meaning after the last call to virt_map.
+ *
+ * When EPT is disabled, the L2 guest code will still access the same L1
+ * GPAs as the EPT enabled case.
+ */
+ if (enable_ept) {
+ prepare_eptp(vmx, vm, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ }
+
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
+ host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+
+ while (!done) {
+ memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ /*
+ * The nested guest wrote at offset 0x1000 in the memslot, but the
+ * dirty bitmap must be filled in according to L1 GPA, not L2.
+ */
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+ if (uc.args[1]) {
+ TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+ TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
+ } else {
+ TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
+ }
+
+ TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ test_vmx_dirty_log(/*enable_ept=*/false);
+
+ if (kvm_cpu_has_ept())
+ test_vmx_dirty_log(/*enable_ept=*/true);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state(vcpu);
+ __run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+ static struct kvm_vcpu *vcpu = NULL;
+
+ if (__vcpu)
+ vcpu = __vcpu;
+ return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vcpu, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ get_set_sigalrm_vcpu(vcpu);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state(vcpu);
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state(vcpu);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ /*
+ * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+ * arbitrary port.
+ */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * L2 must be run without unrestricted guest, verify that the selftests
+ * library hasn't enabled it. Because KVM selftests jump directly to
+ * 64-bit mode, unrestricted guest support isn't required.
+ */
+ GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+ !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 should triple fault after main() stuffs invalid guest state. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ vcpu_run(vcpu);
+
+ run = vcpu->run;
+
+ /*
+ * The first exit to L0 userspace should be an I/O access from L2.
+ * Running L1 should launch L2 without triggering an exit to userspace.
+ */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ memset(&sregs, 0, sizeof(sregs));
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = 1;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask &= val;
+
+ for_each_set_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask = ~mask | val;
+
+ for_each_clear_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+ vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+ BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+ uint64_t msr_bit,
+ struct kvm_x86_cpu_feature feature)
+{
+ uint64_t val;
+
+ vcpu_clear_cpuid_feature(vcpu, feature);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+ if (!kvm_cpu_has(feature))
+ return;
+
+ vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+ uint64_t supported_bits = FEAT_CTL_LOCKED |
+ FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+ FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+ FEAT_CTL_SGX_LC_ENABLED |
+ FEAT_CTL_SGX_ENABLED |
+ FEAT_CTL_LMCE_ENABLED;
+ int bit, r;
+
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+ for_each_clear_bit(bit, &supported_bits, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+ TEST_ASSERT(r == 0,
+ "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ /* No need to actually do KVM_RUN, thus no guest code. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vmx_save_restore_msrs_test(vcpu);
+ ia32_feature_control_msr_test(vcpu);
+
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t vmx_pages_gva;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+#include <sys/ioctl.h>
+
+#include <linux/bitmap.h>
+
+#include "kvm_test_harness.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static union perf_capabilities {
+ struct {
+ u64 lbr_format:6;
+ u64 pebs_trap:1;
+ u64 pebs_arch_reg:1;
+ u64 pebs_format:4;
+ u64 smm_freeze:1;
+ u64 full_width_write:1;
+ u64 pebs_baseline:1;
+ u64 perf_metrics:1;
+ u64 pebs_output_pt_available:1;
+ u64 anythread_deprecated:1;
+ };
+ u64 capabilities;
+} host_cap;
+
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+ .lbr_format = -1,
+ .pebs_trap = 1,
+ .pebs_arch_reg = 1,
+ .pebs_format = -1,
+ .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+ .lbr_format = -1,
+ .pebs_format = -1,
+};
+
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+ uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for value '0x%lx', got vector '0x%x'",
+ val, vector);
+}
+
+static void guest_code(uint64_t current_val)
+{
+ int i;
+
+ guest_test_perf_capabilities_gp(current_val);
+ guest_test_perf_capabilities_gp(0);
+
+ for (i = 0; i < 64; i++)
+ guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
+{
+ struct ucall uc;
+ int r, i;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+ host_cap.capabilities);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(i));
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ host_cap.capabilities ^ BIT_ULL(i));
+ }
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
+{
+ const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+ int bit;
+
+ for_each_set_bit(bit, &fungible_caps, 64) {
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities & ~BIT_ULL(bit));
+ }
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
+{
+ const uint64_t reserved_caps = (~host_cap.capabilities |
+ immutable_caps.capabilities) &
+ ~format_caps.capabilities;
+ union perf_capabilities val = host_cap;
+ int r, bit;
+
+ for_each_set_bit(bit, &reserved_caps, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(bit));
+ TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ BIT_ULL(bit), bit);
+ }
+
+ /*
+ * KVM only supports the host's native LBR format, as well as '0' (to
+ * disable LBR support). Verify KVM rejects all other LBR formats.
+ */
+ for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ if (val.lbr_format == host_cap.lbr_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ val.lbr_format, host_cap.lbr_format);
+ }
+
+ /* Ditto for the PEBS format. */
+ for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ if (val.pebs_format == host_cap.pebs_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ val.pebs_format, host_cap.pebs_format);
+ }
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
+{
+ int r;
+
+ if (!host_cap.lbr_format)
+ return;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+ vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+ r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
+{
+ uint64_t val;
+ int i, r;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+ TEST_ASSERT_EQ(val, host_cap.capabilities);
+
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+ TEST_ASSERT_EQ(val, 0);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
+ TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
+ i, BIT_ULL(i));
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+ TEST_ASSERT(host_cap.full_width_write,
+ "Full-width writes should always be supported");
+
+ return test_harness_run(argc, argv);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PREEMPTION_TIMER_VALUE 100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
+void l2_guest_code(void)
+{
+ u64 vmx_pt_delta;
+
+ vmcall();
+ l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+ /*
+ * Wait until the 1st threshold has passed
+ */
+ do {
+ l2_vmx_pt_finish = rdtsc();
+ vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+ vmx_pt_rate;
+ } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+ /*
+ * Force L2 through Save and Restore cycle
+ */
+ GUEST_SYNC(1);
+
+ l2_save_restore_done = 1;
+
+ /*
+ * Now wait for the preemption timer to fire and
+ * exit to L1
+ */
+ while ((l2_vmx_pt_finish = rdtsc()))
+ ;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 l1_vmx_pt_start;
+ u64 l1_vmx_pt_finish;
+ u64 l1_tsc_deadline, l2_tsc_deadline;
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Check for Preemption timer support
+ */
+ basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+ ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+ : MSR_IA32_VMX_PINBASED_CTLS);
+ ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+ : MSR_IA32_VMX_EXIT_CTLS);
+
+ if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+ !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+ return;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+ /*
+ * Turn on PIN control and resume the guest
+ */
+ GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+ vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_VMX_PREEMPTION_TIMER));
+
+ GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+ PREEMPTION_TIMER_VALUE));
+
+ vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+ l2_save_restore_done = 0;
+
+ l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+ GUEST_ASSERT(!vmresume());
+
+ l1_vmx_pt_finish = rdtsc();
+
+ /*
+ * Ensure exit from L2 happens after L2 goes through
+ * save and restore
+ */
+ GUEST_ASSERT(l2_save_restore_done);
+
+ /*
+ * Ensure the exit from L2 is due to preemption timer expiry
+ */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+ l1_tsc_deadline = l1_vmx_pt_start +
+ (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+ l2_tsc_deadline = l2_vmx_pt_start +
+ (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+ /*
+ * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+ * tsc deadlines so that host can verify they are as expected
+ */
+ GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+ l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ if (vmx_pages)
+ l1_guest_code(vmx_pages);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs1, regs2;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ /*
+ * AMD currently does not implement any VMX features, so for now we
+ * just early out.
+ */
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_regs_get(vcpu, ®s1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+ /*
+ * If this stage 2 then we should verify the vmx pt expiry
+ * is as expected.
+ * From L1's perspective verify Preemption timer hasn't
+ * expired too early.
+ * From L2's perspective verify Preemption timer hasn't
+ * expired too late.
+ */
+ if (stage == 2) {
+
+ pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+ stage, uc.args[2], uc.args[3]);
+
+ pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+ stage, uc.args[4], uc.args[5]);
+
+ TEST_ASSERT(uc.args[2] >= uc.args[3],
+ "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+ stage, uc.args[2], uc.args[3]);
+
+ TEST_ASSERT(uc.args[4] < uc.args[5],
+ "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+ stage, uc.args[4], uc.args[5]);
+ }
+
+ state = vcpu_save_state(vcpu);
+ memset(®s1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, ®s1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(®s2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, ®s2);
+ TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_set_nested_state_test
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <errno.h>
+#include <linux/kvm.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+/*
+ * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
+ * changes this should be updated.
+ */
+#define VMCS12_REVISION 0x11e57ed0
+
+bool have_evmcs;
+
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
+{
+ vcpu_nested_state_set(vcpu, state);
+}
+
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state,
+ int expected_errno)
+{
+ int rv;
+
+ rv = __vcpu_nested_state_set(vcpu, state);
+ TEST_ASSERT(rv == -1 && errno == expected_errno,
+ "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
+ strerror(expected_errno), expected_errno, rv, strerror(errno),
+ errno);
+}
+
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ test_nested_state_expect_errno(vcpu, state, EINVAL);
+}
+
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ test_nested_state_expect_errno(vcpu, state, EFAULT);
+}
+
+void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
+ u32 vmcs12_revision)
+{
+ /* Set revision_id in vmcs12 to vmcs12_revision. */
+ memcpy(&state->data, &vmcs12_revision, sizeof(u32));
+}
+
+void set_default_state(struct kvm_nested_state *state)
+{
+ memset(state, 0, sizeof(*state));
+ state->flags = KVM_STATE_NESTED_RUN_PENDING |
+ KVM_STATE_NESTED_GUEST_MODE;
+ state->format = 0;
+ state->size = sizeof(*state);
+}
+
+void set_default_vmx_state(struct kvm_nested_state *state, int size)
+{
+ memset(state, 0, size);
+ if (have_evmcs)
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ state->format = 0;
+ state->size = size;
+ state->hdr.vmx.vmxon_pa = 0x1000;
+ state->hdr.vmx.vmcs12_pa = 0x2000;
+ state->hdr.vmx.smm.flags = 0;
+ set_revision_id_for_vmcs12(state, VMCS12_REVISION);
+}
+
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
+{
+ /* Add a page for VMCS12. */
+ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+ struct kvm_nested_state *state =
+ (struct kvm_nested_state *)malloc(state_sz);
+
+ /* The format must be set to 0. 0 for VMX, 1 for SVM. */
+ set_default_vmx_state(state, state_sz);
+ state->format = 1;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * We cannot virtualize anything if the guest does not have VMX
+ * enabled.
+ */
+ set_default_vmx_state(state, state_sz);
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * We cannot virtualize anything if the guest does not have VMX
+ * enabled. We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
+ * is set to -1ull, but the flags must be zero.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.vmxon_pa = -1ull;
+ test_nested_state_expect_einval(vcpu, state);
+
+ state->hdr.vmx.vmcs12_pa = -1ull;
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+
+ /* Enable VMX in the guest CPUID. */
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
+
+ /*
+ * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
+ * setting the nested state. When the eVMCS flag is not set, the
+ * expected return value is '0'.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->flags = 0;
+ state->hdr.vmx.vmxon_pa = -1ull;
+ state->hdr.vmx.vmcs12_pa = -1ull;
+ test_nested_state(vcpu, state);
+
+ /*
+ * When eVMCS is supported, the eVMCS flag can only be set if the
+ * enlightened VMCS capability has been enabled.
+ */
+ if (have_evmcs) {
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+ vcpu_enable_evmcs(vcpu);
+ test_nested_state(vcpu, state);
+ }
+
+ /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
+ state->hdr.vmx.smm.flags = 1;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.vmxon_pa = -1ull;
+ state->flags = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* It is invalid to have vmxon_pa set to a non-page aligned address. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.vmxon_pa = 1;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
+ * KVM_STATE_NESTED_GUEST_MODE set together.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE |
+ KVM_STATE_NESTED_RUN_PENDING;
+ state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * It is invalid to have any of the SMM flags set besides:
+ * KVM_STATE_NESTED_SMM_GUEST_MODE
+ * KVM_STATE_NESTED_SMM_VMXON
+ */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
+ KVM_STATE_NESTED_SMM_VMXON);
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Outside SMM, SMM flags must be zero. */
+ set_default_vmx_state(state, state_sz);
+ state->flags = 0;
+ state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and vmcs12
+ * if VMCS12 physical address is set
+ */
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ test_nested_state(vcpu, state);
+
+ /*
+ * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+ * contents but L2 not running.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+
+ /* Invalid flags are rejected, even if no VMCS loaded. */
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* vmxon_pa cannot be the same address as vmcs_pa. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.vmxon_pa = 0;
+ state->hdr.vmx.vmcs12_pa = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Test that if we leave nesting the state reflects that when we get
+ * it again.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.vmxon_pa = -1ull;
+ state->hdr.vmx.vmcs12_pa = -1ull;
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
+ TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+ "Size must be between %ld and %d. The size returned was %d.",
+ sizeof(*state), state_sz, state->size);
+ TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
+ TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+ free(state);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_nested_state state;
+ struct kvm_vcpu *vcpu;
+
+ have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ /*
+ * AMD currently does not implement set_nested_state, so for now we
+ * just early out.
+ */
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ /*
+ * First run tests with VMX disabled to check error handling.
+ */
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+
+ /* Passing a NULL kvm_nested_state causes a EFAULT. */
+ test_nested_state_expect_efault(vcpu, NULL);
+
+ /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
+ set_default_state(&state);
+ state.size = 0;
+ test_nested_state_expect_einval(vcpu, &state);
+
+ /*
+ * Setting the flags 0xf fails the flags check. The only flags that
+ * can be used are:
+ * KVM_STATE_NESTED_GUEST_MODE
+ * KVM_STATE_NESTED_RUN_PENDING
+ * KVM_STATE_NESTED_EVMCS
+ */
+ set_default_state(&state);
+ state.flags = 0xf;
+ test_nested_state_expect_einval(vcpu, &state);
+
+ /*
+ * If KVM_STATE_NESTED_RUN_PENDING is set then
+ * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
+ */
+ set_default_state(&state);
+ state.flags = KVM_STATE_NESTED_RUN_PENDING;
+ test_nested_state_expect_einval(vcpu, &state);
+
+ test_vmx_nested_state(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_tsc_adjust_test
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+ PORT_ABORT = 0x1000,
+ PORT_REPORT,
+ PORT_DONE,
+};
+
+enum {
+ VMXON_PAGE = 0,
+ VMCS_PAGE,
+ MSR_BITMAP_PAGE,
+
+ NUM_VMX_PAGES,
+};
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+ int64_t adjust;
+
+ adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+ GUEST_SYNC(adjust);
+ GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+ uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+ wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+ wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ /* Jump into L2. First, test failure to load guest CR3. */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+ vmwrite(GUEST_CR3, save_cr3);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ GUEST_DONE();
+}
+
+static void report(int64_t val)
+{
+ pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ report(uc.args[1]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR 0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+ uint32_t halter_apic_id;
+ volatile uint64_t hlt_count;
+ volatile uint64_t wake_count;
+ uint64_t ipis_sent;
+ uint64_t migrations_attempted;
+ uint64_t migrations_completed;
+ uint32_t icr;
+ uint32_t icr2;
+ uint32_t halter_tpr;
+ uint32_t halter_ppr;
+
+ /*
+ * Record local version register as a cross-check that APIC access
+ * worked. Value should match what KVM reports (APIC_VERSION in
+ * arch/x86/kvm/lapic.c). If test is failing, check that values match
+ * to determine whether APIC access exits are working.
+ */
+ uint32_t halter_lvr;
+};
+
+struct thread_params {
+ struct test_data_page *data;
+ struct kvm_vcpu *vcpu;
+ uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+ verify_apic_base_addr();
+ xapic_enable();
+
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
+
+ /*
+ * Loop forever HLTing and recording halts & wakes. Disable interrupts
+ * each time around to minimize window between signaling the pending
+ * halt to the sender vCPU and executing the halt. No need to disable on
+ * first run as this vCPU executes first and the host waits for it to
+ * signal going into first halt before starting the sender vCPU. Record
+ * TPR and PPR for diagnostic purposes in case the test fails.
+ */
+ for (;;) {
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
+ data->hlt_count++;
+ asm volatile("sti; hlt; cli");
+ data->wake_count++;
+ }
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ ipis_rcvd++;
+ xapic_write_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+ uint64_t last_wake_count;
+ uint64_t last_hlt_count;
+ uint64_t last_ipis_rcvd_count;
+ uint32_t icr_val;
+ uint32_t icr2_val;
+ uint64_t tsc_start;
+
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /*
+ * Init interrupt command register for sending IPIs
+ *
+ * Delivery mode=fixed, per SDM:
+ * "Delivers the interrupt specified in the vector field to the target
+ * processor."
+ *
+ * Destination mode=physical i.e. specify target by its local APIC
+ * ID. This vCPU assumes that the halter vCPU has already started and
+ * set data->halter_apic_id.
+ */
+ icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+ icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+ data->icr = icr_val;
+ data->icr2 = icr2_val;
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ for (;;) {
+ /*
+ * Send IPI to halter vCPU.
+ * First IPI can be sent unconditionally because halter vCPU
+ * starts earlier.
+ */
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
+ data->ipis_sent++;
+
+ /*
+ * Wait up to ~1 sec for halter to indicate that it has:
+ * 1. Received the IPI
+ * 2. Woken up from the halt
+ * 3. Gone back into halt
+ * Current CPUs typically run at 2.x Ghz which is ~2
+ * billion ticks per second.
+ */
+ tsc_start = rdtsc();
+ while (rdtsc() - tsc_start < 2000000000) {
+ if ((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count))
+ break;
+ }
+
+ GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count));
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ }
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct kvm_vcpu *vcpu = params->vcpu;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false,
+ "vCPU %u exited with error: %s.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu",
+ vcpu->id, (const char *)uc.args[0],
+ params->data->ipis_sent, params->data->hlt_count,
+ params->data->wake_count,
+ *params->pipis_rcvd, params->data->halter_tpr,
+ params->data->halter_ppr, params->data->halter_lvr,
+ params->data->migrations_attempted,
+ params->data->migrations_completed);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(r == 0,
+ "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(r == 0,
+ "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+ uint64_t *pipis_rcvd)
+{
+ long pages_not_moved;
+ unsigned long nodemask = 0;
+ unsigned long nodemasks[sizeof(nodemask) * 8];
+ int nodes = 0;
+ time_t start_time, last_update, now;
+ time_t interval_secs = 1;
+ int i, r;
+ int from, to;
+ unsigned long bit;
+ uint64_t hlt_count;
+ uint64_t wake_count;
+ uint64_t ipis_sent;
+
+ fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+ delay_usecs);
+
+ /* Get set of first 64 numa nodes available */
+ r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ 0, MPOL_F_MEMS_ALLOWED);
+ TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+ fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+ "(each 1-bit indicates node is present): %#lx\n",
+ sizeof(nodemask) * 8, nodemask);
+
+ /* Init array of masks containing a single-bit in each, one for each
+ * available node. migrate_pages called below requires specifying nodes
+ * as bit masks.
+ */
+ for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+ if (nodemask & bit) {
+ nodemasks[nodes] = nodemask & bit;
+ nodes++;
+ }
+ }
+
+ TEST_ASSERT(nodes > 1,
+ "Did not find at least 2 numa nodes. Can't do migration");
+
+ fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+ from = 0;
+ to = 1;
+ start_time = time(NULL);
+ last_update = start_time;
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+
+ while ((int)(time(NULL) - start_time) < run_secs) {
+ data->migrations_attempted++;
+
+ /*
+ * migrate_pages with PID=0 will migrate all pages of this
+ * process between the nodes specified as bitmasks. The page
+ * backing the APIC access address belongs to this process
+ * because it is allocated by KVM in the context of the
+ * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+ * test may break or give a false positive signal.
+ */
+ pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+ &nodemasks[from],
+ &nodemasks[to]);
+ if (pages_not_moved < 0)
+ fprintf(stderr,
+ "migrate_pages failed, errno=%d\n", errno);
+ else if (pages_not_moved > 0)
+ fprintf(stderr,
+ "migrate_pages could not move %ld pages\n",
+ pages_not_moved);
+ else
+ data->migrations_completed++;
+
+ from = to;
+ to++;
+ if (to == nodes)
+ to = 0;
+
+ now = time(NULL);
+ if (((now - start_time) % interval_secs == 0) &&
+ (now != last_update)) {
+ last_update = now;
+ fprintf(stderr,
+ "%lu seconds: Migrations attempted=%lu completed=%lu, "
+ "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+ now - start_time, data->migrations_attempted,
+ data->migrations_completed,
+ data->ipis_sent, *pipis_rcvd,
+ data->hlt_count, data->wake_count);
+
+ TEST_ASSERT(ipis_sent != data->ipis_sent &&
+ hlt_count != data->hlt_count &&
+ wake_count != data->wake_count,
+ "IPI, HLT and wake count have not increased "
+ "in the last %lu seconds. "
+ "HLTer is likely hung.", interval_secs);
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+ }
+ usleep(delay_usecs);
+ }
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+ bool *migrate, int *delay_usecs)
+{
+ for (;;) {
+ int opt = getopt(argc, argv, "s:d:m");
+
+ if (opt == -1)
+ break;
+ switch (opt) {
+ case 's':
+ *run_secs = parse_size(optarg);
+ break;
+ case 'm':
+ *migrate = true;
+ break;
+ case 'd':
+ *delay_usecs = parse_size(optarg);
+ break;
+ default:
+ TEST_ASSERT(false,
+ "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+ "-m adds calls to migrate_pages while vCPUs are running."
+ " Default is no migrations.\n"
+ "-d <delay microseconds> - delay between migrate_pages() calls."
+ " Default is %d microseconds.",
+ DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int r;
+ int wait_secs;
+ const int max_halter_wait = 10;
+ int run_secs = 0;
+ int delay_usecs = 0;
+ struct test_data_page *data;
+ vm_vaddr_t test_data_page_vaddr;
+ bool migrate = false;
+ pthread_t threads[2];
+ struct thread_params params[2];
+ struct kvm_vm *vm;
+ uint64_t *pipis_rcvd;
+
+ get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+ if (run_secs <= 0)
+ run_secs = DEFAULT_RUN_SECS;
+ if (delay_usecs <= 0)
+ delay_usecs = DEFAULT_DELAY_USECS;
+
+ vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code);
+
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
+
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+ data = addr_gva2hva(vm, test_data_page_vaddr);
+ memset(data, 0, sizeof(*data));
+ params[0].data = data;
+ params[1].data = data;
+
+ vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+ vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+
+ pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+ params[0].pipis_rcvd = pipis_rcvd;
+ params[1].pipis_rcvd = pipis_rcvd;
+
+ /* Start halter vCPU thread and wait for it to execute first HLT. */
+ r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]);
+ TEST_ASSERT(r == 0,
+ "pthread_create halter failed errno=%d", errno);
+ fprintf(stderr, "Halter vCPU thread started\n");
+
+ wait_secs = 0;
+ while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+ sleep(1);
+ wait_secs++;
+ }
+
+ TEST_ASSERT(data->hlt_count,
+ "Halter vCPU did not execute first HLT within %d seconds",
+ max_halter_wait);
+
+ fprintf(stderr,
+ "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+ data->halter_apic_id, wait_secs);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]);
+ TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+ fprintf(stderr,
+ "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+ run_secs);
+
+ if (!migrate)
+ sleep(run_secs);
+ else
+ do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+ /*
+ * Cancel threads and wait for them to stop.
+ */
+ cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+ cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+ fprintf(stderr,
+ "Test successful after running for %d seconds.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter APIC ID=%#x\n"
+ "Sender ICR value=%#x ICR2 value=%#x\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu\n",
+ run_secs, data->ipis_sent,
+ data->hlt_count, data->wake_count, *pipis_rcvd,
+ data->halter_apic_id,
+ data->icr, data->icr2,
+ data->halter_tpr, data->halter_ppr, data->halter_lvr,
+ data->migrations_attempted, data->migrations_completed);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+ struct kvm_vcpu *vcpu;
+ bool is_x2apic;
+ bool has_xavic_errata;
+};
+
+static void xapic_guest_code(void)
+{
+ asm volatile("cli");
+
+ xapic_enable();
+
+ while (1) {
+ uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+ (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+ xapic_write_reg(APIC_ICR2, val >> 32);
+ xapic_write_reg(APIC_ICR, val);
+ GUEST_SYNC(val);
+ }
+}
+
+#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \
+ GENMASK_ULL(17, 16) | \
+ GENMASK_ULL(13, 13))
+
+static void x2apic_guest_code(void)
+{
+ asm volatile("cli");
+
+ x2apic_enable();
+
+ do {
+ uint64_t val = x2apic_read_reg(APIC_IRR) |
+ x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+ if (val & X2APIC_RSVD_BITS_MASK) {
+ x2apic_write_reg_fault(APIC_ICR, val);
+ } else {
+ x2apic_write_reg(APIC_ICR, val);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
+ }
+ GUEST_SYNC(val);
+ } while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ struct kvm_lapic_state xapic;
+ struct ucall uc;
+ uint64_t icr;
+
+ /*
+ * Tell the guest what ICR value to write. Use the IRR to pass info,
+ * all bits are valid and should not be modified by KVM (ignoring the
+ * fact that vectors 0-15 are technically illegal).
+ */
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ *((u32 *)&xapic.regs[APIC_IRR]) = val;
+ *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], val);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+ (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+ if (!x->is_x2apic) {
+ if (!x->has_xavic_errata)
+ val &= (-1u | (0xffull << (32 + 24)));
+ } else if (val & X2APIC_RSVD_BITS_MASK) {
+ return;
+ }
+
+ if (x->has_xavic_errata)
+ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ else
+ TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+}
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ /*
+ * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
+ * it is as _must_ be zero. Intel simply ignores the bit. Don't test
+ * the BUSY bit for x2APIC, as there is no single correct behavior.
+ */
+ if (!x->is_x2apic)
+ ____test_icr(x, val | APIC_ICR_BUSY);
+
+ ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ uint64_t icr, i, j;
+
+ icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ /*
+ * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
+ * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
+ */
+ icr = APIC_INT_ASSERT | 0xff;
+ for (i = 0; i < 0xff; i++) {
+ if (i == vcpu->id)
+ continue;
+ for (j = 0; j < 8; j++)
+ __test_icr(x, i << (32 + 24) | icr | (j << 8));
+ }
+
+ /* And again with a shorthand destination for all types of IPIs. */
+ icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+ for (i = 0; i < 8; i++)
+ __test_icr(x, icr | (i << 8));
+
+ /* And a few garbage value, just make sure it's an IRQ (blocked). */
+ __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+{
+ uint32_t apic_id, expected;
+ struct kvm_lapic_state xapic;
+
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
+ apic_id = *((u32 *)&xapic.regs[APIC_ID]);
+
+ TEST_ASSERT(apic_id == expected,
+ "APIC_ID not set back to %s format; wanted = %x, got = %x",
+ (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
+ expected, apic_id);
+}
+
+/*
+ * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
+ * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and
+ * when the APIC transitions for DISABLED to ENABLED is architectural behavior
+ * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
+ * attempted to transition from x2APIC to xAPIC without disabling the APIC is
+ * architecturally disallowed.
+ */
+static void test_apic_id(void)
+{
+ const uint32_t NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ uint64_t apic_base;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
+ vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
+
+ for (i = 0; i < NR_VCPUS; i++) {
+ apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
+
+ TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
+ "APIC not in ENABLED state at vCPU RESET");
+ TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
+ "APIC not in xAPIC mode at vCPU RESET");
+
+ __test_apic_id(vcpus[i], apic_base);
+ __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
+ __test_apic_id(vcpus[i], apic_base);
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void test_x2apic_id(void)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+
+ /*
+ * Try stuffing a modified x2APIC ID, KVM should ignore the value and
+ * always return the vCPU's default/readonly x2APIC ID.
+ */
+ for (i = 0; i <= 0xff; i++) {
+ *(u32 *)(lapic.regs + APIC_ID) = i << 24;
+ *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+ TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
+ "x2APIC ID should be fully readonly");
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct xapic_vcpu x = {
+ .vcpu = NULL,
+ .is_x2apic = true,
+ };
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ /*
+ * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+ * the guest in order to test AVIC. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+ x.is_x2apic = false;
+
+ /*
+ * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
+ * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
+ * drops writes, AMD does not). Account for the errata when checking
+ * that KVM reads back what was written.
+ */
+ x.has_xavic_errata = host_cpu_is_amd &&
+ get_kvm_amd_param_bool("avic");
+
+ vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ test_apic_id();
+ test_x2apic_id();
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
+do { \
+ uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ \
+ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
+ __supported == ((xfeatures) | (dependencies)), \
+ "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+ __supported, (xfeatures), (dependencies)); \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently. E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
+do { \
+ uint64_t __supported = (supported_xcr0) & (xfeatures); \
+ \
+ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \
+ "supported = 0x%lx, xfeatures = 0x%llx", \
+ __supported, (xfeatures)); \
+} while (0)
+
+static void guest_code(void)
+{
+ uint64_t initial_xcr0;
+ uint64_t supported_xcr0;
+ int i, vector;
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+ initial_xcr0 = xgetbv(0);
+ supported_xcr0 = this_cpu_supported_xcr0();
+
+ GUEST_ASSERT(initial_xcr0 == supported_xcr0);
+
+ /* Check AVX */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_YMM,
+ XFEATURE_MASK_SSE);
+
+ /* Check MPX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ /* Check AVX-512 */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_AVX512,
+ XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_AVX512);
+
+ /* Check AMX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_XTILE);
+
+ vector = xsetbv_safe(0, XFEATURE_MASK_FP);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(FP), got vector '0x%x'",
+ vector);
+
+ vector = xsetbv_safe(0, supported_xcr0);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(0x%lx), got vector '0x%x'",
+ supported_xcr0, vector);
+
+ for (i = 0; i < 64; i++) {
+ if (supported_xcr0 & BIT_ULL(i))
+ continue;
+
+ vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
+ BIT_ULL(i), supported_xcr0, vector);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+#define SHINFO_REGION_GVA 0xc0000000ULL
+#define SHINFO_REGION_GPA 0xc0000000ULL
+#define SHINFO_REGION_SLOT 10
+
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2 12
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
+#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define EVTCHN_VECTOR 0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+enum {
+ TEST_INJECT_VECTOR = 0,
+ TEST_RUNSTATE_runnable,
+ TEST_RUNSTATE_blocked,
+ TEST_RUNSTATE_offline,
+ TEST_RUNSTATE_ADJUST,
+ TEST_RUNSTATE_DATA,
+ TEST_STEAL_TIME,
+ TEST_EVTCHN_MASKED,
+ TEST_EVTCHN_UNMASKED,
+ TEST_EVTCHN_SLOWPATH,
+ TEST_EVTCHN_SEND_IOCTL,
+ TEST_EVTCHN_HCALL,
+ TEST_EVTCHN_HCALL_SLOWPATH,
+ TEST_EVTCHN_HCALL_EVENTFD,
+ TEST_TIMER_SETUP,
+ TEST_TIMER_WAIT,
+ TEST_TIMER_RESTORE,
+ TEST_POLL_READY,
+ TEST_POLL_TIMEOUT,
+ TEST_POLL_MASKED,
+ TEST_POLL_WAKE,
+ SET_VCPU_INFO,
+ TEST_TIMER_PAST,
+ TEST_LOCKING_SEND_RACE,
+ TEST_LOCKING_POLL_RACE,
+ TEST_LOCKING_POLL_TIMEOUT,
+ TEST_DONE,
+
+ TEST_GUEST_SAW_IRQ,
+};
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+#define MIN_STEAL_TIME 50000
+
+#define SHINFO_RACE_TIMEOUT 2 /* seconds */
+
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_event_channel_op 32
+
+#define SCHEDOP_poll 3
+
+#define EVTCHNOP_send 4
+
+#define EVTCHNSTAT_interdomain 2
+
+struct evtchn_send {
+ u32 port;
+};
+
+struct sched_poll {
+ u32 *ports;
+ unsigned int nr_ports;
+ u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5]; /* Extra field for overrun check */
+};
+
+struct compat_vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5];
+} __attribute__((__packed__));
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+ struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+
+ vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
+ vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
+ guest_saw_irq = true;
+
+ GUEST_SYNC(TEST_GUEST_SAW_IRQ);
+}
+
+static void guest_wait_for_irq(void)
+{
+ while (!guest_saw_irq)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+ guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+ int i;
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ /* Trigger an interrupt injection */
+ GUEST_SYNC(TEST_INJECT_VECTOR);
+
+ guest_wait_for_irq();
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(TEST_RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(TEST_RUNSTATE_ADJUST);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(TEST_RUNSTATE_DATA);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(TEST_STEAL_TIME);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_MASKED);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_UNMASKED);
+
+ guest_wait_for_irq();
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
+
+ guest_wait_for_irq();
+
+ /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+ GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL);
+
+ /* Our turn. Deliver event channel (to ourselves) with
+ * EVTCHNOP_send hypercall. */
+ struct evtchn_send s = { .port = 127 };
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+ /*
+ * Same again, but this time the host has messed with memslots so it
+ * should take the slow path in kvm_xen_set_evtchn().
+ */
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
+
+ /* Deliver "outbound" event channel to an eventfd which
+ * happens to be one of our own irqfds. */
+ s.port = 197;
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_SETUP);
+
+ /* Set a timer 100ms in the future. */
+ xen_hypercall(__HYPERVISOR_set_timer_op,
+ rs->state_entry_time + 100000000, NULL);
+
+ GUEST_SYNC(TEST_TIMER_WAIT);
+
+ /* Now wait for the timer */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_RESTORE);
+
+ /* The host has 'restored' the timer. Just wait for it. */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_POLL_READY);
+
+ /* Poll for an event channel port which is already set */
+ u32 ports[1] = { EVTCHN_TIMER };
+ struct sched_poll p = {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0,
+ };
+
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_TIMEOUT);
+
+ /* Poll for an unset port and wait for the timeout. */
+ p.timeout = 100000000;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_MASKED);
+
+ /* A timer will wake the masked port we're waiting on, while we poll */
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_WAKE);
+
+ /* Set the vcpu_info to point at exactly the place it already is to
+ * make sure the attribute is functional. */
+ GUEST_SYNC(SET_VCPU_INFO);
+
+ /* A timer wake an *unmasked* port which should wake us with an
+ * actual interrupt, while we're polling on a different port. */
+ ports[0]++;
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_PAST);
+
+ /* Timer should have fired already */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_SEND_RACE);
+ /* Racing host ioctls */
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_POLL_RACE);
+ /* Racing vmcall against host ioctl */
+
+ ports[0] = 0;
+
+ p = (struct sched_poll) {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0
+ };
+
+wait_for_timer:
+ /*
+ * Poll for a timer wake event while the worker thread is mucking with
+ * the shared info. KVM XEN drops timer IRQs if the shared info is
+ * invalid when the timer expires. Arbitrarily poll 100 times before
+ * giving up and asking the VMM to re-arm the timer. 100 polls should
+ * consume enough time to beat on KVM without taking too long if the
+ * timer IRQ is dropped due to an invalid event channel.
+ */
+ for (i = 0; i < 100 && !guest_saw_irq; i++)
+ __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ /*
+ * Re-send the timer IRQ if it was (likely) dropped due to the timer
+ * expiring while the event channel was invalid.
+ */
+ if (!guest_saw_irq) {
+ GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
+ goto wait_for_timer;
+ }
+ guest_saw_irq = false;
+
+ GUEST_SYNC(TEST_DONE);
+}
+
+static struct shared_info *shinfo;
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+ if (vinfo)
+ printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+ vcpu_dump(stdout, vcpu, 0);
+ TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+ struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+ struct kvm_xen_hvm_attr cache_activate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = KVM_XEN_INVALID_GFN
+ };
+
+ struct kvm_xen_hvm_attr cache_activate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+ .u.shared_info.hva = (unsigned long)shinfo
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.hva = 0
+ };
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
+ for (;;) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+ if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+ }
+ }
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_xen_hvm_attr evt_reset;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ bool verbose;
+ int ret;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+ bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+ bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Map a region for the shared_info page */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
+
+ shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+
+ /* Let the kernel know that we *will* use it for sending all
+ * event channels, which lets it intercept SCHEDOP_poll */
+ if (do_evtchn_tests)
+ hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ struct kvm_xen_hvm_attr lm = {
+ .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+ .u.long_mode = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ if (do_runstate_flag) {
+ struct kvm_xen_hvm_attr ruf = {
+ .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+ .u.runstate_update_flag = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+ ruf.u.runstate_update_flag = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+ TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+ "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+ }
+
+ struct kvm_xen_hvm_attr ha = {};
+
+ if (has_shinfo_hva) {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+ ha.u.shared_info.hva = (unsigned long)shinfo;
+ } else {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+ ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+ }
+
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
+ struct kvm_xen_vcpu_attr vi = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ .u.gpa = VCPU_INFO_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+ struct kvm_xen_vcpu_attr pvclock = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+ .u.gpa = PVTIME_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+ struct kvm_xen_hvm_attr vec = {
+ .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+ .u.vector = EVTCHN_VECTOR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+ vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = eventfd(0, 0);
+ irq_fd[1] = eventfd(0, 0);
+
+ /* Unexpected, but not a KVM failure */
+ if (irq_fd[0] == -1 || irq_fd[1] == -1)
+ do_evtchn_tests = do_eventfd_tests = false;
+ }
+
+ if (do_eventfd_tests) {
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+ struct kvm_irqfd ifd = { };
+
+ ifd.fd = irq_fd[0];
+ ifd.gsi = 32;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ ifd.fd = irq_fd[1];
+ ifd.gsi = 33;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
+ struct kvm_xen_vcpu_attr tmr = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+ .u.timer.port = EVTCHN_TIMER,
+ .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ .u.timer.expires_ns = 0
+ };
+
+ if (do_evtchn_tests) {
+ struct kvm_xen_hvm_attr inj = {
+ .type = KVM_XEN_ATTR_TYPE_EVTCHN,
+ .u.evtchn.send_port = 127,
+ .u.evtchn.type = EVTCHNSTAT_interdomain,
+ .u.evtchn.flags = 0,
+ .u.evtchn.deliver.port.port = EVTCHN_TEST1,
+ .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+ .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ /* Test migration to a different vCPU */
+ inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+ inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ inj.u.evtchn.send_port = 197;
+ inj.u.evtchn.deliver.eventfd.port = 0;
+ inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+ inj.u.evtchn.flags = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ }
+ vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+ vinfo->evtchn_upcall_pending = 0;
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+ rs->state = 0x5a;
+
+ bool evtchn_irq_expected = false;
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ if (do_runstate_tests)
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case TEST_INJECT_VECTOR:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
+ evtchn_irq_expected = true;
+ vinfo->evtchn_upcall_pending = 1;
+ break;
+
+ case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
+ TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+ if (!do_runstate_tests)
+ goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+ TEST_RUNSTATE_runnable;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_ADJUST:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_DATA:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_STEAL_TIME:
+ if (verbose)
+ printf("Testing steal time\n");
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+
+ case TEST_EVTCHN_MASKED:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_UNMASKED:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SEND_IOCTL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ if (!do_evtchn_tests)
+ goto done;
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+ struct kvm_irq_routing_xen_evtchn e;
+ e.port = EVTCHN_TEST2;
+ e.vcpu = vcpu->id;
+ e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_EVENTFD:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send to eventfd\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_SETUP:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest oneshot timer\n");
+ break;
+
+ case TEST_TIMER_WAIT:
+ memset(&tmr, 0, sizeof(tmr));
+ tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+ "Timer port not returned");
+ TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ "Timer priority not returned");
+ TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+ "Timer expiry not returned");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_RESTORE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing restored oneshot timer\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_POLL_READY:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing SCHEDOP_poll with already pending event\n");
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+ alarm(1);
+ break;
+
+ case TEST_POLL_TIMEOUT:
+ if (verbose)
+ printf("Testing SCHEDOP_poll timeout\n");
+ shinfo->evtchn_pending[0] = 0;
+ alarm(1);
+ break;
+
+ case TEST_POLL_MASKED:
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on masked event\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_POLL_WAKE:
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+ /* Read it back and check the pending time is reported correctly */
+ tmr.u.timer.expires_ns = 0;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+ "Timer not reported pending");
+ alarm(1);
+ break;
+
+ case SET_VCPU_INFO:
+ if (has_shinfo_hva) {
+ struct kvm_xen_vcpu_attr vih = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+ .u.hva = (unsigned long)vinfo
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+ }
+ break;
+
+ case TEST_TIMER_PAST:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ /* Read timer and check it is no longer pending */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing timer in the past\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_LOCKING_SEND_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ alarm(0);
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+ ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+ TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+ struct kvm_irq_routing_xen_evtchn uxe = {
+ .port = 1,
+ .vcpu = vcpu->id,
+ .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+ };
+
+ evtchn_irq_expected = true;
+ for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+ __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+ break;
+
+ case TEST_LOCKING_POLL_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+ shinfo->evtchn_pending[0] = 1;
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+
+ case TEST_LOCKING_POLL_TIMEOUT:
+ /*
+ * Optional and possibly repeated sync point.
+ * Injecting the timer IRQ may fail if the
+ * shinfo is invalid when the timer expires.
+ * If the timer has expired but the IRQ hasn't
+ * been delivered, rearm the timer and retry.
+ */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+ /* Resume the guest if the timer is still pending. */
+ if (tmr.u.timer.expires_ns)
+ break;
+
+ /* All done if the IRQ was delivered. */
+ if (!evtchn_irq_expected)
+ break;
+
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+ case TEST_DONE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ ret = pthread_cancel(thread);
+ TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+ ret = pthread_join(thread, 0);
+ TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+ goto done;
+
+ case TEST_GUEST_SAW_IRQ:
+ TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+ evtchn_irq_expected = false;
+ break;
+ }
+ break;
+ }
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ done:
+ evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+ evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
+
+ alarm(0);
+
+ /*
+ * Just a *really* basic check that things are being put in the
+ * right place. The actual calculations are much the same for
+ * Xen as they are for the KVM variants, so no need to check.
+ */
+ struct pvclock_wall_clock *wc;
+ struct pvclock_vcpu_time_info *ti, *ti2;
+ struct kvm_clock_data kcdata;
+ long long delta;
+
+ wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+ ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+ ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
+ TEST_ASSERT(wc->version && !(wc->version & 1),
+ "Bad wallclock version %x", wc->version);
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
+
+ if (kcdata.flags & KVM_CLOCK_REALTIME) {
+ if (verbose) {
+ printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
+ kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
+ printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
+ kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
+ }
+
+ delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
+
+ /*
+ * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
+ * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
+ * delta as testing clock accuracy is not the goal here. The test just needs to
+ * check that the value in shinfo is somewhat sane.
+ */
+ TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
+ "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
+ wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
+ (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
+ } else {
+ pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
+ }
+
+ TEST_ASSERT(ti->version && !(ti->version & 1),
+ "Bad time_info version %x", ti->version);
+ TEST_ASSERT(ti2->version && !(ti2->version & 1),
+ "Bad time_info version %x", ti->version);
+
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
+
+ /*
+ * Exercise runstate info at all points across the page boundary, in
+ * 32-bit and 64-bit mode. In particular, test the case where it is
+ * configured in 32-bit mode and then switched to 64-bit mode while
+ * active, which takes it onto the second page.
+ */
+ unsigned long runstate_addr;
+ struct compat_vcpu_runstate_info *crs;
+ for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+ runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+ rs = addr_gpa2hva(vm, runstate_addr);
+ crs = (void *)rs;
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Set to compatibility mode */
+ lm.u.long_mode = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ /* Set runstate to new address (kernel will write it) */
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = runstate_addr,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+ if (verbose)
+ printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+ TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+ crs->time[1] + crs->time[2] + crs->time[3],
+ "runstate times don't add up");
+
+
+ /* Now switch to 64-bit mode */
+ lm.u.long_mode = 1;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Don't change the address, just trigger a write */
+ struct kvm_xen_vcpu_attr adj = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+ .u.runstate.state = (uint64_t)-1
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+ if (verbose)
+ printf("64-bit runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define HCALL_REGION_GPA 0xc0000000ULL
+#define HCALL_REGION_SLOT 10
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR 0x40000200
+#define HV_GUEST_OS_ID_MSR 0x40000000
+#define HV_HYPERCALL_MSR 0x40000001
+
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HV_STATUS_INVALID_ALIGNMENT 4
+
+static void guest_code(void)
+{
+ unsigned long rax = INPUTVALUE;
+ unsigned long rdi = ARGVALUE(1);
+ unsigned long rsi = ARGVALUE(2);
+ unsigned long rdx = ARGVALUE(3);
+ unsigned long rcx;
+ register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+ register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+ register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+ /* First a direct invocation of 'vmcall' */
+ __asm__ __volatile__("vmcall" :
+ "=a"(rax) :
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Fill in the Xen hypercall page */
+ __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+ "a" (HCALL_REGION_GPA & 0xffffffff),
+ "d" (HCALL_REGION_GPA >> 32));
+
+ /* Set Hyper-V Guest OS ID */
+ __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+ "a" (0x5a), "d" (0));
+
+ /* Hyper-V hypercall page */
+ u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+ __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+ "a" (msrval & 0xffffffff),
+ "d" (msrval >> 32));
+
+ /* Invoke a Xen hypercall */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Invoke a Hyper-V hypercall */
+ rax = 0;
+ rcx = HVCALL_SIGNAL_EVENT; /* code */
+ rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + PAGE_SIZE),
+ "a"(rax), "c"(rcx), "d"(rdx),
+ "r"(r8));
+ GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int xen_caps;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ /* Map a region for the hypercall pages */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_XEN) {
+ TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+ TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+ TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+ TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+ run->xen.u.hcall.result = RETVALUE;
+ continue;
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_BITS 64
+
+int main(int argc, char *argv[])
+{
+ bool xss_in_msr_list;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t xss_val;
+ int i, r;
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
+
+ xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
+ TEST_ASSERT(xss_val == 0,
+ "MSR_IA32_XSS should be initialized to zero");
+
+ vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
+
+ /*
+ * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+ * that trying to set the guest IA32_XSS to an unsupported value fails.
+ * Also, in the future when a non-zero value succeeds check that
+ * IA32_XSS is in the list of MSRs to save/restore.
+ */
+ xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
+ for (i = 0; i < MSR_BITS; ++i) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+ /*
+ * Setting a list of MSRs returns the entry that "faulted", or
+ * the last entry +1 if all MSRs were successfully written.
+ */
+ TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+ TEST_ASSERT(r != 1 || xss_in_msr_list,
+ "IA32_XSS was able to be set, but was not in save/restore list");
+ }
+
+ kvm_vm_free(vm);
+}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * amx tests
- *
- * Copyright (C) 2021, Intel, Inc.
- *
- * Tests for amx #NM exception and save/restore.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/syscall.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#ifndef __x86_64__
-# error This test is 64-bit only
-#endif
-
-#define NUM_TILES 8
-#define TILE_SIZE 1024
-#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
-
-/* Tile configuration associated: */
-#define PALETTE_TABLE_INDEX 1
-#define MAX_TILES 16
-#define RESERVED_BYTES 14
-
-#define XSAVE_HDR_OFFSET 512
-
-struct tile_config {
- u8 palette_id;
- u8 start_row;
- u8 reserved[RESERVED_BYTES];
- u16 colsb[MAX_TILES];
- u8 rows[MAX_TILES];
-};
-
-struct tile_data {
- u8 data[NUM_TILES * TILE_SIZE];
-};
-
-struct xtile_info {
- u16 bytes_per_tile;
- u16 bytes_per_row;
- u16 max_names;
- u16 max_rows;
- u32 xsave_offset;
- u32 xsave_size;
-};
-
-static struct xtile_info xtile;
-
-static inline void __ldtilecfg(void *cfg)
-{
- asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
- : : "a"(cfg));
-}
-
-static inline void __tileloadd(void *tile)
-{
- asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
- : : "a"(tile), "d"(0));
-}
-
-static inline void __tilerelease(void)
-{
- asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
-}
-
-static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
-{
- uint32_t rfbm_lo = rfbm;
- uint32_t rfbm_hi = rfbm >> 32;
-
- asm volatile("xsavec (%%rdi)"
- : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
- : "memory");
-}
-
-static void check_xtile_info(void)
-{
- GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
-
- GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
- GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
-
- xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
- GUEST_ASSERT(xtile.xsave_offset == 2816);
- xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
- GUEST_ASSERT(xtile.xsave_size == 8192);
- GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
-
- GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
- GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
- PALETTE_TABLE_INDEX);
-
- GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
- xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
- GUEST_ASSERT(xtile.max_names == 8);
- xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
- GUEST_ASSERT(xtile.bytes_per_tile == 1024);
- xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
- GUEST_ASSERT(xtile.bytes_per_row == 64);
- xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
- GUEST_ASSERT(xtile.max_rows == 16);
-}
-
-static void set_tilecfg(struct tile_config *cfg)
-{
- int i;
-
- /* Only palette id 1 */
- cfg->palette_id = 1;
- for (i = 0; i < xtile.max_names; i++) {
- cfg->colsb[i] = xtile.bytes_per_row;
- cfg->rows[i] = xtile.max_rows;
- }
-}
-
-static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
- struct tile_data *tiledata,
- struct xstate *xstate)
-{
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
- this_cpu_has(X86_FEATURE_OSXSAVE));
- check_xtile_info();
- GUEST_SYNC(1);
-
- /* xfd=0, enable amx */
- wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(2);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
- set_tilecfg(amx_cfg);
- __ldtilecfg(amx_cfg);
- GUEST_SYNC(3);
- /* Check save/restore when trap to userspace */
- __tileloadd(tiledata);
- GUEST_SYNC(4);
- __tilerelease();
- GUEST_SYNC(5);
- /*
- * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
- * the xcomp_bv.
- */
- xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
- __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
- GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
- GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
-
- /* xfd=0x40000, disable amx tiledata */
- wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
-
- /*
- * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
- * remains the same even when amx tiledata is disabled by IA32_XFD.
- */
- xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
- __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
- GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
- GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
-
- GUEST_SYNC(6);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
- set_tilecfg(amx_cfg);
- __ldtilecfg(amx_cfg);
- /* Trigger #NM exception */
- __tileloadd(tiledata);
- GUEST_SYNC(10);
-
- GUEST_DONE();
-}
-
-void guest_nm_handler(struct ex_regs *regs)
-{
- /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
- GUEST_SYNC(7);
- GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
- GUEST_SYNC(8);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
- /* Clear xfd_err */
- wrmsr(MSR_IA32_XFD_ERR, 0);
- /* xfd=0, enable amx */
- wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(9);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_regs regs1, regs2;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct kvm_x86_state *state;
- int xsave_restore_size;
- vm_vaddr_t amx_cfg, tiledata, xstate;
- struct ucall uc;
- u32 amx_offset;
- int ret;
-
- /*
- * Note, all off-by-default features must be enabled before anything
- * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
- */
- vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
- "KVM should enumerate max XSAVE size when XSAVE is supported");
- xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
-
- vcpu_regs_get(vcpu, ®s1);
-
- /* Register #NM handler */
- vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
- /* amx cfg for guest_code */
- amx_cfg = vm_vaddr_alloc_page(vm);
- memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
-
- /* amx tiledata for guest_code */
- tiledata = vm_vaddr_alloc_pages(vm, 2);
- memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
-
- /* XSAVE state for guest_code */
- xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
- memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
- vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
-
- for (;;) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- switch (uc.args[1]) {
- case 1:
- case 2:
- case 3:
- case 5:
- case 6:
- case 7:
- case 8:
- fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
- break;
- case 4:
- case 10:
- fprintf(stderr,
- "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
-
- /* Compacted mode, get amx offset by xsave area
- * size subtract 8K amx size.
- */
- amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
- state = vcpu_save_state(vcpu);
- void *amx_start = (void *)state->xsave + amx_offset;
- void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
- /* Only check TMM0 register, 1 tile */
- ret = memcmp(amx_start, tiles_data, TILE_SIZE);
- TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
- kvm_x86_state_cleanup(state);
- break;
- case 9:
- fprintf(stderr,
- "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
- break;
- }
- break;
- case UCALL_DONE:
- fprintf(stderr, "UCALL_DONE\n");
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- state = vcpu_save_state(vcpu);
- memset(®s1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, ®s1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
-
- memset(®s2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, ®s2);
- TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- }
-done:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2024 Intel Corporation
- *
- * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
- * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by
- * programming TMICT (timer initial count) to the largest value possible (so
- * that the timer will not expire during the test). Then, after an arbitrary
- * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
- * of the expected value based on the time elapsed, the APIC bus frequency, and
- * the programmed TDCR (timer divide configuration register).
- */
-
-#include "apic.h"
-#include "test_util.h"
-
-/*
- * Possible TDCR values with matching divide count. Used to modify APIC
- * timer frequency.
- */
-static const struct {
- const uint32_t tdcr;
- const uint32_t divide_count;
-} tdcrs[] = {
- {0x0, 2},
- {0x1, 4},
- {0x2, 8},
- {0x3, 16},
- {0x8, 32},
- {0x9, 64},
- {0xa, 128},
- {0xb, 1},
-};
-
-static bool is_x2apic;
-
-static void apic_enable(void)
-{
- if (is_x2apic)
- x2apic_enable();
- else
- xapic_enable();
-}
-
-static uint32_t apic_read_reg(unsigned int reg)
-{
- return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
-}
-
-static void apic_write_reg(unsigned int reg, uint32_t val)
-{
- if (is_x2apic)
- x2apic_write_reg(reg, val);
- else
- xapic_write_reg(reg, val);
-}
-
-static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
-{
- uint64_t tsc_hz = guest_tsc_khz * 1000;
- const uint32_t tmict = ~0u;
- uint64_t tsc0, tsc1, freq;
- uint32_t tmcct;
- int i;
-
- apic_enable();
-
- /*
- * Setup one-shot timer. The vector does not matter because the
- * interrupt should not fire.
- */
- apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
-
- for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
- apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
- apic_write_reg(APIC_TMICT, tmict);
-
- tsc0 = rdtsc();
- udelay(delay_ms * 1000);
- tmcct = apic_read_reg(APIC_TMCCT);
- tsc1 = rdtsc();
-
- /*
- * Stop the timer _after_ reading the current, final count, as
- * writing the initial counter also modifies the current count.
- */
- apic_write_reg(APIC_TMICT, 0);
-
- freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
- /* Check if measured frequency is within 5% of configured frequency. */
- __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
- "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
- freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
- apic_hz, tdcrs[i].divide_count, tsc_hz);
- }
-
- GUEST_DONE();
-}
-
-static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
-{
- bool done = false;
- struct ucall uc;
-
- while (!done) {
- vcpu_run(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_DONE:
- done = true;
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- break;
- }
- }
-}
-
-static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
- bool x2apic)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- int ret;
-
- is_x2apic = x2apic;
-
- vm = vm_create(1);
-
- sync_global_to_guest(vm, is_x2apic);
-
- vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
- NSEC_PER_SEC / apic_hz);
-
- vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
- vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
-
- ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
- NSEC_PER_SEC / apic_hz);
- TEST_ASSERT(ret < 0 && errno == EINVAL,
- "Setting of APIC bus frequency after vCPU is created should fail.");
-
- if (!is_x2apic)
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
- test_apic_bus_clock(vcpu);
- kvm_vm_free(vm);
-}
-
-static void help(char *name)
-{
- puts("");
- printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
- puts("");
- printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
- printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
- puts("");
-}
-
-int main(int argc, char *argv[])
-{
- /*
- * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
- * different enough from the default 1GHz to be interesting.
- */
- uint64_t apic_hz = 25 * 1000 * 1000;
- uint64_t delay_ms = 100;
- int opt;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
-
- while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
- switch (opt) {
- case 'f':
- apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
- break;
- case 'd':
- delay_ms = atoi_positive("Delay in milliseconds", optarg);
- break;
- case 'h':
- default:
- help(argv[0]);
- exit(KSFT_SKIP);
- }
- }
-
- run_apic_bus_clock_test(apic_hz, delay_ms, false);
- run_apic_bus_clock_test(apic_hz, delay_ms, true);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat Inc.
- *
- * Generic tests for KVM CPUID set/get ioctls
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct cpuid_mask {
- union {
- struct {
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- };
- u32 regs[4];
- };
-};
-
-static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
-{
- int i;
- u32 eax, ebx, ecx, edx;
-
- for (i = 0; i < guest_cpuid->nent; i++) {
- __cpuid(guest_cpuid->entries[i].function,
- guest_cpuid->entries[i].index,
- &eax, &ebx, &ecx, &edx);
-
- GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
- GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
- GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
- GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
- }
-
-}
-
-static void guest_main(struct kvm_cpuid2 *guest_cpuid)
-{
- GUEST_SYNC(1);
-
- test_guest_cpuids(guest_cpuid);
-
- GUEST_SYNC(2);
-
- GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
-
- GUEST_DONE();
-}
-
-static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
-{
- struct cpuid_mask mask;
-
- memset(&mask, 0xff, sizeof(mask));
-
- switch (entry->function) {
- case 0x1:
- mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
- break;
- case 0x7:
- mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
- break;
- case 0xd:
- /*
- * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
- * XCR0 and IA32_XSS MSR values.
- */
- if (entry->index < 2)
- mask.ebx = 0;
- break;
- }
- return mask;
-}
-
-static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
- const struct kvm_cpuid2 *cpuid2)
-{
- const struct kvm_cpuid_entry2 *e1, *e2;
- int i;
-
- TEST_ASSERT(cpuid1->nent == cpuid2->nent,
- "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
-
- for (i = 0; i < cpuid1->nent; i++) {
- struct cpuid_mask mask;
-
- e1 = &cpuid1->entries[i];
- e2 = &cpuid2->entries[i];
-
- TEST_ASSERT(e1->function == e2->function &&
- e1->index == e2->index && e1->flags == e2->flags,
- "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
- i, e1->function, e1->index, e1->flags,
- e2->function, e2->index, e2->flags);
-
- /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
- mask = get_const_cpuid_mask(e1);
-
- TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
- (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
- (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
- (e1->edx & mask.edx) == (e2->edx & mask.edx),
- "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
- e1->function, e1->index,
- e1->eax & mask.eax, e1->ebx & mask.ebx,
- e1->ecx & mask.ecx, e1->edx & mask.edx,
- e2->eax & mask.eax, e2->ebx & mask.ebx,
- e2->ecx & mask.ecx, e2->edx & mask.edx);
- }
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1,
- "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
- return;
- case UCALL_DONE:
- return;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu->run->exit_reason));
- }
-}
-
-struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
-{
- int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
- vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
- struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
-
- memcpy(guest_cpuids, cpuid, size);
-
- *p_gva = gva;
- return guest_cpuids;
-}
-
-static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *ent;
- int rc;
- u32 eax, ebx, x;
-
- /* Setting unmodified CPUID is allowed */
- rc = __vcpu_set_cpuid(vcpu);
- TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
-
- /* Changing CPU features is forbidden */
- ent = vcpu_get_cpuid_entry(vcpu, 0x7);
- ebx = ent->ebx;
- ent->ebx--;
- rc = __vcpu_set_cpuid(vcpu);
- TEST_ASSERT(rc, "Changing CPU features should fail");
- ent->ebx = ebx;
-
- /* Changing MAXPHYADDR is forbidden */
- ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
- eax = ent->eax;
- x = eax & 0xff;
- ent->eax = (eax & ~0xffu) | (x - 1);
- rc = __vcpu_set_cpuid(vcpu);
- TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
- ent->eax = eax;
-}
-
-static void test_get_cpuid2(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
- int i, r;
-
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
- TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
- "KVM didn't update nent on success, wanted %u, got %u",
- vcpu->cpuid->nent, cpuid->nent);
-
- for (i = 0; i < vcpu->cpuid->nent; i++) {
- cpuid->nent = i;
- r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
- TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
- TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
- }
- free(cpuid);
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- vm_vaddr_t cpuid_gva;
- struct kvm_vm *vm;
- int stage;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
- compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
-
- vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
-
- vcpu_args_set(vcpu, 1, cpuid_gva);
-
- for (stage = 0; stage < 3; stage++)
- run_vcpu(vcpu, stage);
-
- set_cpuid_after_run(vcpu);
-
- test_get_cpuid2(vcpu);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CR4 and CPUID sync test
- *
- * Copyright 2018, Red Hat, Inc. and/or its affiliates.
- *
- * Author:
- * Wei Huang <wei@redhat.com>
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define MAGIC_HYPERCALL_PORT 0x80
-
-static void guest_code(void)
-{
- u32 regs[4] = {
- [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
- [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
- };
-
- /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
- GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
-
- /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-
- /*
- * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
- * and then restore CR4. Do this all in assembly to ensure no AVX
- * instructions are executed while OSXSAVE=0.
- */
- asm volatile (
- "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
- "cpuid\n\t"
- "mov %%rdi, %%cr4\n\t"
- : "+a" (regs[KVM_CPUID_EAX]),
- "=b" (regs[KVM_CPUID_EBX]),
- "+c" (regs[KVM_CPUID_ECX]),
- "=d" (regs[KVM_CPUID_EDX])
- : "D" (get_cr4())
- );
-
- /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
- GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
-
- /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct kvm_sregs sregs;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- while (1) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
- vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
- /* emulate hypervisor clearing CR4.OSXSAVE */
- vcpu_sregs_get(vcpu, &sregs);
- sregs.cr4 &= ~X86_CR4_OSXSAVE;
- vcpu_sregs_set(vcpu, &sregs);
- continue;
- }
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM guest debug register tests
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <string.h>
-#include "kvm_util.h"
-#include "processor.h"
-#include "apic.h"
-
-#define DR6_BD (1 << 13)
-#define DR7_GD (1 << 13)
-
-#define IRQ_VECTOR 0xAA
-
-/* For testing data access debug BP */
-uint32_t guest_value;
-
-extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
-
-static void guest_code(void)
-{
- /* Create a pending interrupt on current vCPU */
- x2apic_enable();
- x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
- APIC_DM_FIXED | IRQ_VECTOR);
-
- /*
- * Software BP tests.
- *
- * NOTE: sw_bp need to be before the cmd here, because int3 is an
- * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
- * capture it using the vcpu exception bitmap).
- */
- asm volatile("sw_bp: int3");
-
- /* Hardware instruction BP test */
- asm volatile("hw_bp: nop");
-
- /* Hardware data BP test */
- asm volatile("mov $1234,%%rax;\n\t"
- "mov %%rax,%0;\n\t write_data:"
- : "=m" (guest_value) : : "rax");
-
- /*
- * Single step test, covers 2 basic instructions and 2 emulated
- *
- * Enable interrupts during the single stepping to see that pending
- * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
- *
- * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
- * exits to userspace due to single-step being enabled.
- */
- asm volatile("ss_start: "
- "sti\n\t"
- "xor %%eax,%%eax\n\t"
- "cpuid\n\t"
- "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
- "wrmsr\n\t"
- "cli\n\t"
- : : : "eax", "ebx", "ecx", "edx");
-
- /* DR6.BD test */
- asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
- GUEST_DONE();
-}
-
-#define CAST_TO_RIP(v) ((unsigned long long)&(v))
-
-static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
-{
- struct kvm_regs regs;
-
- vcpu_regs_get(vcpu, ®s);
- regs.rip += insn_len;
- vcpu_regs_set(vcpu, ®s);
-}
-
-int main(void)
-{
- struct kvm_guest_debug debug;
- unsigned long long target_dr6, target_rip;
- struct kvm_vcpu *vcpu;
- struct kvm_run *run;
- struct kvm_vm *vm;
- struct ucall uc;
- uint64_t cmd;
- int i;
- /* Instruction lengths starting at ss_start */
- int ss_size[6] = {
- 1, /* sti*/
- 2, /* xor */
- 2, /* cpuid */
- 5, /* mov */
- 2, /* rdmsr */
- 1, /* cli */
- };
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu->run;
-
- /* Test software BPs - int3 */
- memset(&debug, 0, sizeof(debug));
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
- vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vcpu);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
- run->debug.arch.exception == BP_VECTOR &&
- run->debug.arch.pc == CAST_TO_RIP(sw_bp),
- "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
- run->exit_reason, run->debug.arch.exception,
- run->debug.arch.pc, CAST_TO_RIP(sw_bp));
- vcpu_skip_insn(vcpu, 1);
-
- /* Test instruction HW BP over DR[0-3] */
- for (i = 0; i < 4; i++) {
- memset(&debug, 0, sizeof(debug));
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
- debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
- debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
- vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vcpu);
- target_dr6 = 0xffff0ff0 | (1UL << i);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
- run->debug.arch.exception == DB_VECTOR &&
- run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
- run->debug.arch.dr6 == target_dr6,
- "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
- "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
- i, run->exit_reason, run->debug.arch.exception,
- run->debug.arch.pc, CAST_TO_RIP(hw_bp),
- run->debug.arch.dr6, target_dr6);
- }
- /* Skip "nop" */
- vcpu_skip_insn(vcpu, 1);
-
- /* Test data access HW BP over DR[0-3] */
- for (i = 0; i < 4; i++) {
- memset(&debug, 0, sizeof(debug));
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
- debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
- debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
- (0x000d0000UL << (4*i));
- vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vcpu);
- target_dr6 = 0xffff0ff0 | (1UL << i);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
- run->debug.arch.exception == DB_VECTOR &&
- run->debug.arch.pc == CAST_TO_RIP(write_data) &&
- run->debug.arch.dr6 == target_dr6,
- "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
- "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
- i, run->exit_reason, run->debug.arch.exception,
- run->debug.arch.pc, CAST_TO_RIP(write_data),
- run->debug.arch.dr6, target_dr6);
- /* Rollback the 4-bytes "mov" */
- vcpu_skip_insn(vcpu, -7);
- }
- /* Skip the 4-bytes "mov" */
- vcpu_skip_insn(vcpu, 7);
-
- /* Test single step */
- target_rip = CAST_TO_RIP(ss_start);
- target_dr6 = 0xffff4ff0ULL;
- for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
- target_rip += ss_size[i];
- memset(&debug, 0, sizeof(debug));
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
- KVM_GUESTDBG_BLOCKIRQ;
- debug.arch.debugreg[7] = 0x00000400;
- vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vcpu);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
- run->debug.arch.exception == DB_VECTOR &&
- run->debug.arch.pc == target_rip &&
- run->debug.arch.dr6 == target_dr6,
- "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
- "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
- i, run->exit_reason, run->debug.arch.exception,
- run->debug.arch.pc, target_rip, run->debug.arch.dr6,
- target_dr6);
- }
-
- /* Finally test global disable */
- memset(&debug, 0, sizeof(debug));
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
- debug.arch.debugreg[7] = 0x400 | DR7_GD;
- vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vcpu);
- target_dr6 = 0xffff0ff0 | DR6_BD;
- TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
- run->debug.arch.exception == DB_VECTOR &&
- run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
- run->debug.arch.dr6 == target_dr6,
- "DR7.GD: exit %d exception %d rip 0x%llx "
- "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
- run->exit_reason, run->debug.arch.exception,
- run->debug.arch.pc, target_rip, run->debug.arch.dr6,
- target_dr6);
-
- /* Disable all debug controls, run to the end */
- memset(&debug, 0, sizeof(debug));
- vcpu_guest_debug_set(vcpu, &debug);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- cmd = get_ucall(vcpu, &uc);
- TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
-
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty logging page splitting test
- *
- * Based on dirty_log_perf.c
- *
- * Copyright (C) 2018, Red Hat, Inc.
- * Copyright (C) 2023, Google, Inc.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <linux/bitmap.h>
-
-#include "kvm_util.h"
-#include "test_util.h"
-#include "memstress.h"
-#include "guest_modes.h"
-#include "ucall_common.h"
-
-#define VCPUS 2
-#define SLOTS 2
-#define ITERATIONS 2
-
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
-
-static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
-
-static u64 dirty_log_manual_caps;
-static bool host_quit;
-static int iteration;
-static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
-
-struct kvm_page_stats {
- uint64_t pages_4k;
- uint64_t pages_2m;
- uint64_t pages_1g;
- uint64_t hugepages;
-};
-
-static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
-{
- stats->pages_4k = vm_get_stat(vm, "pages_4k");
- stats->pages_2m = vm_get_stat(vm, "pages_2m");
- stats->pages_1g = vm_get_stat(vm, "pages_1g");
- stats->hugepages = stats->pages_2m + stats->pages_1g;
-
- pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
- stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
- stats->hugepages);
-}
-
-static void run_vcpu_iteration(struct kvm_vm *vm)
-{
- int i;
-
- iteration++;
- for (i = 0; i < VCPUS; i++) {
- while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
- iteration)
- ;
- }
-}
-
-static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
-{
- struct kvm_vcpu *vcpu = vcpu_args->vcpu;
- int vcpu_idx = vcpu_args->vcpu_idx;
-
- while (!READ_ONCE(host_quit)) {
- int current_iteration = READ_ONCE(iteration);
-
- vcpu_run(vcpu);
-
- TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
-
- vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
-
- /* Wait for the start of the next iteration to be signaled. */
- while (current_iteration == READ_ONCE(iteration) &&
- READ_ONCE(iteration) >= 0 &&
- !READ_ONCE(host_quit))
- ;
- }
-}
-
-static void run_test(enum vm_guest_mode mode, void *unused)
-{
- struct kvm_vm *vm;
- unsigned long **bitmaps;
- uint64_t guest_num_pages;
- uint64_t host_num_pages;
- uint64_t pages_per_slot;
- int i;
- struct kvm_page_stats stats_populated;
- struct kvm_page_stats stats_dirty_logging_enabled;
- struct kvm_page_stats stats_dirty_pass[ITERATIONS];
- struct kvm_page_stats stats_clear_pass[ITERATIONS];
- struct kvm_page_stats stats_dirty_logging_disabled;
- struct kvm_page_stats stats_repopulated;
-
- vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
- SLOTS, backing_src, false);
-
- guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
- guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
- host_num_pages = vm_num_host_pages(mode, guest_num_pages);
- pages_per_slot = host_num_pages / SLOTS;
- TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
- TEST_ASSERT(!(host_num_pages % 512),
- "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
-
- bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
-
- if (dirty_log_manual_caps)
- vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
- dirty_log_manual_caps);
-
- /* Start the iterations */
- iteration = -1;
- host_quit = false;
-
- for (i = 0; i < VCPUS; i++)
- vcpu_last_completed_iteration[i] = -1;
-
- memstress_start_vcpu_threads(VCPUS, vcpu_worker);
-
- run_vcpu_iteration(vm);
- get_page_stats(vm, &stats_populated, "populating memory");
-
- /* Enable dirty logging */
- memstress_enable_dirty_logging(vm, SLOTS);
-
- get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
-
- while (iteration < ITERATIONS) {
- run_vcpu_iteration(vm);
- get_page_stats(vm, &stats_dirty_pass[iteration - 1],
- "dirtying memory");
-
- memstress_get_dirty_log(vm, bitmaps, SLOTS);
-
- if (dirty_log_manual_caps) {
- memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
-
- get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
- }
- }
-
- /* Disable dirty logging */
- memstress_disable_dirty_logging(vm, SLOTS);
-
- get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
-
- /* Run vCPUs again to fault pages back in. */
- run_vcpu_iteration(vm);
- get_page_stats(vm, &stats_repopulated, "repopulating memory");
-
- /*
- * Tell the vCPU threads to quit. No need to manually check that vCPUs
- * have stopped running after disabling dirty logging, the join will
- * wait for them to exit.
- */
- host_quit = true;
- memstress_join_vcpu_threads(VCPUS);
-
- memstress_free_bitmaps(bitmaps, SLOTS);
- memstress_destroy_vm(vm);
-
- TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
- stats_populated.pages_1g * 512 * 512), host_num_pages);
-
- /*
- * Check that all huge pages were split. Since large pages can only
- * exist in the data slot, and the vCPUs should have dirtied all pages
- * in the data slot, there should be no huge pages left after splitting.
- * Splitting happens at dirty log enable time without
- * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
- * with that capability.
- */
- if (dirty_log_manual_caps) {
- TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
- TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
- "Expected at least '%lu' 4KiB pages, found only '%lu'",
- host_num_pages, stats_clear_pass[0].pages_4k);
- TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
- } else {
- TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
- TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
- "Expected at least '%lu' 4KiB pages, found only '%lu'",
- host_num_pages, stats_dirty_logging_enabled.pages_4k);
- }
-
- /*
- * Once dirty logging is disabled and the vCPUs have touched all their
- * memory again, the hugepage counts should be the same as they were
- * right after initial population of memory.
- */
- TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
- TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
-}
-
-static void help(char *name)
-{
- puts("");
- printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
- name);
- puts("");
- printf(" -b: specify the size of the memory region which should be\n"
- " dirtied by each vCPU. e.g. 10M or 3G.\n"
- " (default: 1G)\n");
- backing_src_help("-s");
- puts("");
-}
-
-int main(int argc, char *argv[])
-{
- int opt;
-
- TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
- TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
-
- while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
- switch (opt) {
- case 'b':
- guest_percpu_mem_size = parse_size(optarg);
- break;
- case 'h':
- help(argv[0]);
- exit(0);
- case 's':
- backing_src = parse_backing_src_type(optarg);
- break;
- default:
- help(argv[0]);
- exit(1);
- }
- }
-
- if (!is_backing_src_hugetlb(backing_src)) {
- pr_info("This test will only work reliably with HugeTLB memory. "
- "It can work with THP, but that is best effort.\n");
- }
-
- guest_modes_append_default();
-
- dirty_log_manual_caps = 0;
- for_each_guest_mode(run_test, NULL);
-
- dirty_log_manual_caps =
- kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-
- if (dirty_log_manual_caps) {
- dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
- KVM_DIRTY_LOG_INITIALLY_SET);
- for_each_guest_mode(run_test, NULL);
- } else {
- pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
- }
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022, Google LLC.
- *
- * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
- */
-#include "flds_emulation.h"
-#include "test_util.h"
-#include "ucall_common.h"
-
-#define MMIO_GPA 0x700000000
-#define MMIO_GVA MMIO_GPA
-
-static void guest_code(void)
-{
- /* Execute flds with an MMIO address to force KVM to emulate it. */
- flds(MMIO_GVA);
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
- virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
-
- vcpu_run(vcpu);
- handle_flds_emulation_failure_exit(vcpu);
- vcpu_run(vcpu);
- TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
-
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-static bool is_kvm_controlled_msr(uint32_t msr)
-{
- return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
-}
-
-/*
- * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
- * MSR, and doesn't allow setting the hidden version.
- */
-static bool is_hidden_vmx_msr(uint32_t msr)
-{
- switch (msr) {
- case MSR_IA32_VMX_PINBASED_CTLS:
- case MSR_IA32_VMX_PROCBASED_CTLS:
- case MSR_IA32_VMX_EXIT_CTLS:
- case MSR_IA32_VMX_ENTRY_CTLS:
- return true;
- default:
- return false;
- }
-}
-
-static bool is_quirked_msr(uint32_t msr)
-{
- return msr != MSR_AMD64_DE_CFG;
-}
-
-static void test_feature_msr(uint32_t msr)
-{
- const uint64_t supported_mask = kvm_get_feature_msr(msr);
- uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /*
- * Don't bother testing KVM-controlled MSRs beyond verifying that the
- * MSR can be read from userspace. Any value is effectively legal, as
- * KVM is bound by x86 architecture, not by ABI.
- */
- if (is_kvm_controlled_msr(msr))
- return;
-
- /*
- * More goofy behavior. KVM reports the host CPU's actual revision ID,
- * but initializes the vCPU's revision ID to an arbitrary value.
- */
- if (msr == MSR_IA32_UCODE_REV)
- reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
-
- /*
- * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
- * full set of features supported by KVM. For non-quirked MSRs, and
- * when the quirk is disabled, KVM must zero-initialize the MSR and let
- * userspace do the configuration.
- */
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
- TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
- "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
- reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
- vcpu_get_msr(vcpu, msr));
- if (!is_hidden_vmx_msr(msr))
- vcpu_set_msr(vcpu, msr, supported_mask);
- kvm_vm_free(vm);
-
- if (is_hidden_vmx_msr(msr))
- return;
-
- if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
- !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
- return;
-
- vm = vm_create(1);
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
-
- vcpu = vm_vcpu_add(vm, 0, NULL);
- TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
- "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
- msr, vcpu_get_msr(vcpu, msr));
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- const struct kvm_msr_list *feature_list;
- int i;
-
- /*
- * Skip the entire test if MSR_FEATURES isn't supported, other tests
- * will cover the "regular" list of MSRs, the coverage here is purely
- * opportunistic and not interesting on its own.
- */
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
-
- (void)kvm_get_msr_index_list();
-
- feature_list = kvm_get_feature_msr_index_list();
- for (i = 0; i < feature_list->nmsrs; i++)
- test_feature_msr(feature_list->indices[i]);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <linux/stringify.h>
-#include <stdint.h>
-
-#include "kvm_test_harness.h"
-#include "apic.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-/* VMCALL and VMMCALL are both 3-byte opcodes. */
-#define HYPERCALL_INSN_SIZE 3
-
-static bool quirk_disabled;
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
- regs->rax = -EFAULT;
- regs->rip += HYPERCALL_INSN_SIZE;
-}
-
-static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
-static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
-
-extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t do_sched_yield(uint8_t apic_id)
-{
- uint64_t ret;
-
- asm volatile("hypercall_insn:\n\t"
- ".byte 0xcc,0xcc,0xcc\n\t"
- : "=a"(ret)
- : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
- : "memory");
-
- return ret;
-}
-
-static void guest_main(void)
-{
- const uint8_t *native_hypercall_insn;
- const uint8_t *other_hypercall_insn;
- uint64_t ret;
-
- if (host_cpu_is_intel) {
- native_hypercall_insn = vmx_vmcall;
- other_hypercall_insn = svm_vmmcall;
- } else if (host_cpu_is_amd) {
- native_hypercall_insn = svm_vmmcall;
- other_hypercall_insn = vmx_vmcall;
- } else {
- GUEST_ASSERT(0);
- /* unreachable */
- return;
- }
-
- memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
-
- ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
-
- /*
- * If the quirk is disabled, verify that guest_ud_handler() "returned"
- * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
- * enabled, verify that the hypercall succeeded and that KVM patched in
- * the "right" hypercall.
- */
- if (quirk_disabled) {
- GUEST_ASSERT(ret == (uint64_t)-EFAULT);
- GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
- HYPERCALL_INSN_SIZE));
- } else {
- GUEST_ASSERT(!ret);
- GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
- HYPERCALL_INSN_SIZE));
- }
-
- GUEST_DONE();
-}
-
-KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
- break;
- case UCALL_DONE:
- return;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
- uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
- }
-}
-
-static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
-{
- struct kvm_vm *vm = vcpu->vm;
-
- vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
-
- if (disable_quirk)
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
- KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
-
- quirk_disabled = disable_quirk;
- sync_global_to_guest(vm, quirk_disabled);
-
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
- enter_guest(vcpu);
-}
-
-KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
-{
- test_fix_hypercall(vcpu, false);
-}
-
-KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
-{
- test_fix_hypercall(vcpu, true);
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
-
- return test_harness_run(argc, argv);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_FLDS_EMULATION_H
-#define SELFTEST_KVM_FLDS_EMULATION_H
-
-#include "kvm_util.h"
-
-#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
-
-/*
- * flds is an instruction that the KVM instruction emulator is known not to
- * support. This can be used in guest code along with a mechanism to force
- * KVM to emulate the instruction (e.g. by providing an MMIO address) to
- * exercise emulation failures.
- */
-static inline void flds(uint64_t address)
-{
- __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
-}
-
-static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_regs regs;
- uint8_t *insn_bytes;
- uint64_t flags;
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-
- TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
- "Unexpected suberror: %u",
- run->emulation_failure.suberror);
-
- flags = run->emulation_failure.flags;
- TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
- flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
- "run->emulation_failure is missing instruction bytes");
-
- TEST_ASSERT(run->emulation_failure.insn_size >= 2,
- "Expected a 2-byte opcode for 'flds', got %d bytes",
- run->emulation_failure.insn_size);
-
- insn_bytes = run->emulation_failure.insn_bytes;
- TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
- "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
- insn_bytes[0], insn_bytes[1]);
-
- vcpu_regs_get(vcpu, ®s);
- regs.rip += 2;
- vcpu_regs_set(vcpu, ®s);
-}
-
-#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2023, Google LLC.
- */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
-{
- const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
- const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
- const uint64_t legal = ignored | valid;
- uint64_t val = BIT_ULL(bit);
- uint64_t actual;
- int r;
-
- r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
- TEST_ASSERT(val & ~legal ? !r : r == 1,
- "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
- val, val & ~legal ? "fail" : "succeed");
-
- actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
- TEST_ASSERT(actual == (val & valid),
- "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
- bit, actual, (val & valid));
-
- vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- unsigned int bit;
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
- for (bit = 0; bit < BITS_PER_LONG; bit++)
- test_hwcr_bit(vcpu, bit);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Tests for Hyper-V clocksources
- */
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-struct ms_hyperv_tsc_page {
- volatile u32 tsc_sequence;
- u32 reserved1;
- volatile u64 tsc_scale;
- volatile s64 tsc_offset;
-} __packed;
-
-/* Simplified mul_u64_u64_shr() */
-static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
-{
- union {
- u64 ll;
- struct {
- u32 low, high;
- } l;
- } rm, rn, rh, a0, b0;
- u64 c;
-
- a0.ll = a;
- b0.ll = b;
-
- rm.ll = (u64)a0.l.low * b0.l.high;
- rn.ll = (u64)a0.l.high * b0.l.low;
- rh.ll = (u64)a0.l.high * b0.l.high;
-
- rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
- rh.l.high = (c >> 32) + rh.l.high;
-
- return rh.ll;
-}
-
-static inline void nop_loop(void)
-{
- int i;
-
- for (i = 0; i < 100000000; i++)
- asm volatile("nop");
-}
-
-static inline void check_tsc_msr_rdtsc(void)
-{
- u64 tsc_freq, r1, r2, t1, t2;
- s64 delta_ns;
-
- tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
- GUEST_ASSERT(tsc_freq > 0);
-
- /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
- r1 = rdtsc();
- t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
- r1 = (r1 + rdtsc()) / 2;
- nop_loop();
- r2 = rdtsc();
- t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
- r2 = (r2 + rdtsc()) / 2;
-
- GUEST_ASSERT(r2 > r1 && t2 > t1);
-
- /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
- delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
- if (delta_ns < 0)
- delta_ns = -delta_ns;
-
- /* 1% tolerance */
- GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
-}
-
-static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
-{
- return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
-}
-
-static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
-{
- u64 r1, r2, t1, t2;
-
- /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
- t1 = get_tscpage_ts(tsc_page);
- r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-
- /* 10 ms tolerance */
- GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
- nop_loop();
-
- t2 = get_tscpage_ts(tsc_page);
- r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
- GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
-}
-
-static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
-{
- u64 tsc_scale, tsc_offset;
-
- /* Set Guest OS id to enable Hyper-V emulation */
- GUEST_SYNC(1);
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- GUEST_SYNC(2);
-
- check_tsc_msr_rdtsc();
-
- GUEST_SYNC(3);
-
- /* Set up TSC page is disabled state, check that it's clean */
- wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
- GUEST_ASSERT(tsc_page->tsc_sequence == 0);
- GUEST_ASSERT(tsc_page->tsc_scale == 0);
- GUEST_ASSERT(tsc_page->tsc_offset == 0);
-
- GUEST_SYNC(4);
-
- /* Set up TSC page is enabled state */
- wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
- GUEST_ASSERT(tsc_page->tsc_sequence != 0);
-
- GUEST_SYNC(5);
-
- check_tsc_msr_tsc_page(tsc_page);
-
- GUEST_SYNC(6);
-
- tsc_offset = tsc_page->tsc_offset;
- /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
-
- GUEST_SYNC(7);
- /* Sanity check TSC page timestamp, it should be close to 0 */
- GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
-
- GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
-
- nop_loop();
-
- /*
- * Enable Re-enlightenment and check that TSC page stays constant across
- * KVM_SET_CLOCK.
- */
- wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
- wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
- tsc_offset = tsc_page->tsc_offset;
- tsc_scale = tsc_page->tsc_scale;
- GUEST_SYNC(8);
- GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
- GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
-
- GUEST_SYNC(9);
-
- check_tsc_msr_tsc_page(tsc_page);
-
- /*
- * Disable re-enlightenment and TSC page, check that KVM doesn't update
- * it anymore.
- */
- wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
- wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
- wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
- memset(tsc_page, 0, sizeof(*tsc_page));
-
- GUEST_SYNC(10);
- GUEST_ASSERT(tsc_page->tsc_sequence == 0);
- GUEST_ASSERT(tsc_page->tsc_offset == 0);
- GUEST_ASSERT(tsc_page->tsc_scale == 0);
-
- GUEST_DONE();
-}
-
-static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
-{
- u64 tsc_freq, r1, r2, t1, t2;
- s64 delta_ns;
-
- tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
- TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
-
- /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
- r1 = rdtsc();
- t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
- r1 = (r1 + rdtsc()) / 2;
- nop_loop();
- r2 = rdtsc();
- t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
- r2 = (r2 + rdtsc()) / 2;
-
- TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
-
- /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
- delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
- if (delta_ns < 0)
- delta_ns = -delta_ns;
-
- /* 1% tolerance */
- TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
- "Elapsed time does not match (MSR=%ld, TSC=%ld)",
- (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- vm_vaddr_t tsc_page_gva;
- int stage;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
- TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
- vcpu_set_hv_cpuid(vcpu);
-
- tsc_page_gva = vm_vaddr_alloc_page(vm);
- memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
- TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
- "TSC page has to be page aligned");
- vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
-
- host_check_tsc_msr_rdtsc(vcpu);
-
- for (stage = 1;; stage++) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- /* Keep in sync with guest_main() */
- TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
- stage);
- goto out;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage,
- "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- /* Reset kvmclock triggering TSC page update */
- if (stage == 7 || stage == 8 || stage == 10) {
- struct kvm_clock_data clock = {0};
-
- vm_ioctl(vm, KVM_SET_CLOCK, &clock);
- }
- }
-
-out:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_HYPERV_CPUID
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-static void guest_code(void)
-{
-}
-
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
-static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
- bool evmcs_expected)
-{
- int i;
- int nent_expected = 10;
- u32 test_val;
-
- TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
- "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
- " (returned %d)",
- nent_expected, hv_cpuid_entries->nent);
-
- for (i = 0; i < hv_cpuid_entries->nent; i++) {
- const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
-
- TEST_ASSERT((entry->function >= 0x40000000) &&
- (entry->function <= 0x40000082),
- "function %x is our of supported range",
- entry->function);
-
- TEST_ASSERT(entry->index == 0,
- ".index field should be zero");
-
- TEST_ASSERT(entry->flags == 0,
- ".flags field should be zero");
-
- TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
- !entry->padding[2], "padding should be zero");
-
- switch (entry->function) {
- case 0x40000000:
- test_val = 0x40000082;
-
- TEST_ASSERT(entry->eax == test_val,
- "Wrong max leaf report in 0x40000000.EAX: %x"
- " (evmcs=%d)",
- entry->eax, evmcs_expected
- );
- break;
- case 0x40000004:
- test_val = entry->eax & (1UL << 18);
-
- TEST_ASSERT(!!test_val == !smt_possible(),
- "NoNonArchitecturalCoreSharing bit"
- " doesn't reflect SMT setting");
- break;
- case 0x4000000A:
- TEST_ASSERT(entry->eax & (1UL << 19),
- "Enlightened MSR-Bitmap should always be supported"
- " 0x40000000.EAX: %x", entry->eax);
- if (evmcs_expected)
- TEST_ASSERT((entry->eax & 0xffff) == 0x101,
- "Supported Enlightened VMCS version range is supposed to be 1:1"
- " 0x40000000.EAX: %x", entry->eax);
-
- break;
- default:
- break;
-
- }
- /*
- * If needed for debug:
- * fprintf(stdout,
- * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
- * entry->function, entry->eax, entry->ebx, entry->ecx,
- * entry->edx);
- */
- }
-}
-
-void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
- static struct kvm_cpuid2 cpuid = {.nent = 0};
- int ret;
-
- if (vcpu)
- ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
- else
- ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
-
- TEST_ASSERT(ret == -1 && errno == E2BIG,
- "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
- " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- const struct kvm_cpuid2 *hv_cpuid_entries;
- struct kvm_vcpu *vcpu;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- /* Test vCPU ioctl version */
- test_hv_cpuid_e2big(vm, vcpu);
-
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
- test_hv_cpuid(hv_cpuid_entries, false);
- free((void *)hv_cpuid_entries);
-
- if (!kvm_cpu_has(X86_FEATURE_VMX) ||
- !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- goto do_sys;
- }
- vcpu_enable_evmcs(vcpu);
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
- test_hv_cpuid(hv_cpuid_entries, true);
- free((void *)hv_cpuid_entries);
-
-do_sys:
- /* Test system ioctl version */
- if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
- print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
- goto out;
- }
-
- test_hv_cpuid_e2big(vm, NULL);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid();
- test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
-
-out:
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for Enlightened VMCS, including nested guest state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <linux/bitmap.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "hyperv.h"
-#include "vmx.h"
-
-static int ud_count;
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
- ud_count++;
- regs->rip += 3; /* VMLAUNCH */
-}
-
-static void guest_nmi_handler(struct ex_regs *regs)
-{
-}
-
-static inline void rdmsr_from_l2(uint32_t msr)
-{
- /* Currently, L1 doesn't preserve GPRs during vmexits. */
- __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
- "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-/* Exit to L1 from L2 with RDMSR instruction */
-void l2_guest_code(void)
-{
- u64 unused;
-
- GUEST_SYNC(7);
-
- GUEST_SYNC(8);
-
- /* Forced exit to L1 upon restore */
- GUEST_SYNC(9);
-
- vmcall();
-
- /* MSR-Bitmap tests */
- rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
- rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
- rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
- vmcall();
- rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
-
- /* L2 TLB flush tests */
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
- rdmsr_from_l2(MSR_FS_BASE);
- /*
- * Note: hypercall status (RAX) is not preserved correctly by L1 after
- * synthetic vmexit, use unchecked version.
- */
- __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
- &unused);
-
- /* Done, exit to L1 and never come back. */
- vmcall();
-}
-
-void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
- vm_vaddr_t hv_hcall_page_gpa)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
-
- x2apic_enable();
-
- GUEST_SYNC(1);
- GUEST_SYNC(2);
-
- enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
- evmcs_enable();
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_SYNC(3);
- GUEST_ASSERT(load_evmcs(hv_pages));
- GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
- GUEST_SYNC(4);
- GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(5);
- GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
- current_evmcs->revision_id = -1u;
- GUEST_ASSERT(vmlaunch());
- current_evmcs->revision_id = EVMCS_VERSION;
- GUEST_SYNC(6);
-
- vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
- PIN_BASED_NMI_EXITING);
-
- /* L2 TLB flush setup */
- current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
- current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
- current_evmcs->hv_vm_id = 1;
- current_evmcs->hv_vp_id = 1;
- current_vp_assist->nested_control.features.directhypercall = 1;
- *(u32 *)(hv_pages->partition_assist) = 0;
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
- GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
- GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
- /*
- * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
- * up-to-date (RIP points where it should and not at the beginning
- * of l2_guest_code(). GUEST_SYNC(9) checkes that.
- */
- GUEST_ASSERT(!vmresume());
-
- GUEST_SYNC(10);
-
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- current_evmcs->guest_rip += 3; /* vmcall */
-
- /* Intercept RDMSR 0xc0000100 */
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
- CPU_BASED_USE_MSR_BITMAPS);
- __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
- current_evmcs->guest_rip += 2; /* rdmsr */
-
- /* Enable enlightened MSR bitmap */
- current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
- current_evmcs->guest_rip += 2; /* rdmsr */
-
- /* Intercept RDMSR 0xc0000101 without telling KVM about it */
- __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
- /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
- current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
- GUEST_ASSERT(!vmresume());
- /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- current_evmcs->guest_rip += 3; /* vmcall */
-
- /* Now tell KVM we've changed MSR-Bitmap */
- current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
- current_evmcs->guest_rip += 2; /* rdmsr */
-
- /*
- * L2 TLB flush test. First VMCALL should be handled directly by L0,
- * no VMCALL exit expected.
- */
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
- current_evmcs->guest_rip += 2; /* rdmsr */
- /* Enable synthetic vmexit */
- *(u32 *)(hv_pages->partition_assist) = 1;
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
-
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_SYNC(11);
-
- /* Try enlightened vmptrld with an incorrect GPA */
- evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
- GUEST_ASSERT(vmlaunch());
- GUEST_ASSERT(ud_count == 1);
- GUEST_DONE();
-}
-
-void inject_nmi(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events;
-
- vcpu_events_get(vcpu, &events);
-
- events.nmi.pending = 1;
- events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
-
- vcpu_events_set(vcpu, &events);
-}
-
-static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
- struct kvm_vcpu *vcpu)
-{
- struct kvm_regs regs1, regs2;
- struct kvm_x86_state *state;
-
- state = vcpu_save_state(vcpu);
- memset(®s1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, ®s1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_set_hv_cpuid(vcpu);
- vcpu_enable_evmcs(vcpu);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
-
- memset(®s2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, ®s2);
- TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- return vcpu;
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
- vm_vaddr_t hcall_page;
-
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- int stage;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
- TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- hcall_page = vm_vaddr_alloc_pages(vm, 1);
- memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
-
- vcpu_set_hv_cpuid(vcpu);
- vcpu_enable_evmcs(vcpu);
-
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
- vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
- vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
-
- vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
- vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
-
- pr_info("Running L1 which uses EVMCS to run L2\n");
-
- for (stage = 1;; stage++) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- vcpu = save_restore_vm(vm, vcpu);
-
- /* Force immediate L2->L1 exit before resuming */
- if (stage == 8) {
- pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
- inject_nmi(vcpu);
- }
-
- /*
- * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
- * restored VM (before the first KVM_RUN) to check that
- * KVM_STATE_NESTED_EVMCS is not lost.
- */
- if (stage == 9) {
- pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
- vcpu = save_restore_vm(vm, vcpu);
- }
- }
-
-done:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
- * exit to userspace and receive result in guest.
- *
- * Negative tests are present in hyperv_features.c
- *
- * Copyright 2022 Google LLC
- * Author: Vipin Sharma <vipinsh@google.com>
- */
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-/* Any value is fine */
-#define EXT_CAPABILITIES 0xbull
-
-static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
- vm_vaddr_t out_pg_gva)
-{
- uint64_t *output_gva;
-
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
-
- output_gva = (uint64_t *)out_pg_gva;
-
- hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
-
- /* TLFS states output will be a uint64_t value */
- GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
-
- GUEST_DONE();
-}
-
-int main(void)
-{
- vm_vaddr_t hcall_out_page;
- vm_vaddr_t hcall_in_page;
- struct kvm_vcpu *vcpu;
- struct kvm_run *run;
- struct kvm_vm *vm;
- uint64_t *outval;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
-
- /* Verify if extended hypercalls are supported */
- if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
- HV_ENABLE_EXTENDED_HYPERCALLS)) {
- print_skip("Extended calls not supported by the kernel");
- exit(KSFT_SKIP);
- }
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu->run;
- vcpu_set_hv_cpuid(vcpu);
-
- /* Hypercall input */
- hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
- memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
-
- /* Hypercall output */
- hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
- memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
-
- vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
- addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
-
- vcpu_run(vcpu);
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
- "Unexpected exit reason: %u (%s)",
- run->exit_reason, exit_reason_str(run->exit_reason));
-
- outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
- *outval = EXT_CAPABILITIES;
- run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
-
- vcpu_run(vcpu);
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s)",
- run->exit_reason, exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
- }
-
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Tests for Hyper-V features enablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-/*
- * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
- * but to activate the feature it is sufficient to set it to a non-zero
- * value. Use BIT(0) for that.
- */
-#define HV_PV_SPINLOCKS_TEST \
- KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
-
-struct msr_data {
- uint32_t idx;
- bool fault_expected;
- bool write;
- u64 write_val;
-};
-
-struct hcall_data {
- uint64_t control;
- uint64_t expect;
- bool ud_expected;
-};
-
-static bool is_write_only_msr(uint32_t msr)
-{
- return msr == HV_X64_MSR_EOI;
-}
-
-static void guest_msr(struct msr_data *msr)
-{
- uint8_t vector = 0;
- uint64_t msr_val = 0;
-
- GUEST_ASSERT(msr->idx);
-
- if (msr->write)
- vector = wrmsr_safe(msr->idx, msr->write_val);
-
- if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
- vector = rdmsr_safe(msr->idx, &msr_val);
-
- if (msr->fault_expected)
- __GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
- else
- __GUEST_ASSERT(!vector,
- "Expected success on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
-
- if (vector || is_write_only_msr(msr->idx))
- goto done;
-
- if (msr->write)
- __GUEST_ASSERT(!vector,
- "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
- msr->idx, msr->write_val, msr_val);
-
- /* Invariant TSC bit appears when TSC invariant control MSR is written to */
- if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
- if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
- else
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
- !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
- }
-
-done:
- GUEST_DONE();
-}
-
-static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
-{
- u64 res, input, output;
- uint8_t vector;
-
- GUEST_ASSERT_NE(hcall->control, 0);
-
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-
- if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
- input = pgs_gpa;
- output = pgs_gpa + 4096;
- } else {
- input = output = 0;
- }
-
- vector = __hyperv_hypercall(hcall->control, input, output, &res);
- if (hcall->ud_expected) {
- __GUEST_ASSERT(vector == UD_VECTOR,
- "Expected #UD for control '%lu', got vector '0x%x'",
- hcall->control, vector);
- } else {
- __GUEST_ASSERT(!vector,
- "Expected no exception for control '%lu', got vector '0x%x'",
- hcall->control, vector);
- GUEST_ASSERT_EQ(res, hcall->expect);
- }
-
- GUEST_DONE();
-}
-
-static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
-{
- /*
- * Enable all supported Hyper-V features, then clear the leafs holding
- * the features that will be tested one by one.
- */
- vcpu_set_hv_cpuid(vcpu);
-
- vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
- vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
- vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
-}
-
-static void guest_test_msrs_access(void)
-{
- struct kvm_cpuid2 *prev_cpuid = NULL;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- int stage = 0;
- vm_vaddr_t msr_gva;
- struct msr_data *msr;
- bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
-
- while (true) {
- vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
-
- msr_gva = vm_vaddr_alloc_page(vm);
- memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
- msr = addr_gva2hva(vm, msr_gva);
-
- vcpu_args_set(vcpu, 1, msr_gva);
- vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
-
- if (!prev_cpuid) {
- vcpu_reset_hv_cpuid(vcpu);
-
- prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
- } else {
- vcpu_init_cpuid(vcpu, prev_cpuid);
- }
-
- /* TODO: Make this entire test easier to maintain. */
- if (stage >= 21)
- vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
-
- switch (stage) {
- case 0:
- /*
- * Only available when Hyper-V identification is set
- */
- msr->idx = HV_X64_MSR_GUEST_OS_ID;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 1:
- msr->idx = HV_X64_MSR_HYPERCALL;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 2:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
- /*
- * HV_X64_MSR_GUEST_OS_ID has to be written first to make
- * HV_X64_MSR_HYPERCALL available.
- */
- msr->idx = HV_X64_MSR_GUEST_OS_ID;
- msr->write = true;
- msr->write_val = HYPERV_LINUX_OS_ID;
- msr->fault_expected = false;
- break;
- case 3:
- msr->idx = HV_X64_MSR_GUEST_OS_ID;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 4:
- msr->idx = HV_X64_MSR_HYPERCALL;
- msr->write = false;
- msr->fault_expected = false;
- break;
-
- case 5:
- msr->idx = HV_X64_MSR_VP_RUNTIME;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 6:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
- msr->idx = HV_X64_MSR_VP_RUNTIME;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 7:
- /* Read only */
- msr->idx = HV_X64_MSR_VP_RUNTIME;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = true;
- break;
-
- case 8:
- msr->idx = HV_X64_MSR_TIME_REF_COUNT;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 9:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
- msr->idx = HV_X64_MSR_TIME_REF_COUNT;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 10:
- /* Read only */
- msr->idx = HV_X64_MSR_TIME_REF_COUNT;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = true;
- break;
-
- case 11:
- msr->idx = HV_X64_MSR_VP_INDEX;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 12:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
- msr->idx = HV_X64_MSR_VP_INDEX;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 13:
- /* Read only */
- msr->idx = HV_X64_MSR_VP_INDEX;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = true;
- break;
-
- case 14:
- msr->idx = HV_X64_MSR_RESET;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 15:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
- msr->idx = HV_X64_MSR_RESET;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 16:
- msr->idx = HV_X64_MSR_RESET;
- msr->write = true;
- /*
- * TODO: the test only writes '0' to HV_X64_MSR_RESET
- * at the moment, writing some other value there will
- * trigger real vCPU reset and the code is not prepared
- * to handle it yet.
- */
- msr->write_val = 0;
- msr->fault_expected = false;
- break;
-
- case 17:
- msr->idx = HV_X64_MSR_REFERENCE_TSC;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 18:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
- msr->idx = HV_X64_MSR_REFERENCE_TSC;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 19:
- msr->idx = HV_X64_MSR_REFERENCE_TSC;
- msr->write = true;
- msr->write_val = 0;
- msr->fault_expected = false;
- break;
-
- case 20:
- msr->idx = HV_X64_MSR_EOM;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 21:
- /*
- * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
- * capability enabled and guest visible CPUID bit unset.
- */
- msr->idx = HV_X64_MSR_EOM;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 22:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
- msr->idx = HV_X64_MSR_EOM;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 23:
- msr->idx = HV_X64_MSR_EOM;
- msr->write = true;
- msr->write_val = 0;
- msr->fault_expected = false;
- break;
-
- case 24:
- msr->idx = HV_X64_MSR_STIMER0_CONFIG;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 25:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
- msr->idx = HV_X64_MSR_STIMER0_CONFIG;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 26:
- msr->idx = HV_X64_MSR_STIMER0_CONFIG;
- msr->write = true;
- msr->write_val = 0;
- msr->fault_expected = false;
- break;
- case 27:
- /* Direct mode test */
- msr->idx = HV_X64_MSR_STIMER0_CONFIG;
- msr->write = true;
- msr->write_val = 1 << 12;
- msr->fault_expected = true;
- break;
- case 28:
- vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
- msr->idx = HV_X64_MSR_STIMER0_CONFIG;
- msr->write = true;
- msr->write_val = 1 << 12;
- msr->fault_expected = false;
- break;
-
- case 29:
- msr->idx = HV_X64_MSR_EOI;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 30:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
- msr->idx = HV_X64_MSR_EOI;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = false;
- break;
-
- case 31:
- msr->idx = HV_X64_MSR_TSC_FREQUENCY;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 32:
- vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
- msr->idx = HV_X64_MSR_TSC_FREQUENCY;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 33:
- /* Read only */
- msr->idx = HV_X64_MSR_TSC_FREQUENCY;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = true;
- break;
-
- case 34:
- msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 35:
- vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
- msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 36:
- msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = false;
- break;
- case 37:
- /* Can only write '0' */
- msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = true;
- break;
-
- case 38:
- msr->idx = HV_X64_MSR_CRASH_P0;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 39:
- vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
- msr->idx = HV_X64_MSR_CRASH_P0;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 40:
- msr->idx = HV_X64_MSR_CRASH_P0;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = false;
- break;
-
- case 41:
- msr->idx = HV_X64_MSR_SYNDBG_STATUS;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 42:
- vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
- vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
- msr->idx = HV_X64_MSR_SYNDBG_STATUS;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 43:
- msr->idx = HV_X64_MSR_SYNDBG_STATUS;
- msr->write = true;
- msr->write_val = 0;
- msr->fault_expected = false;
- break;
-
- case 44:
- /* MSR is not available when CPUID feature bit is unset */
- if (!has_invtsc)
- goto next_stage;
- msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
- msr->write = false;
- msr->fault_expected = true;
- break;
- case 45:
- /* MSR is vailable when CPUID feature bit is set */
- if (!has_invtsc)
- goto next_stage;
- vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
- msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
- msr->write = false;
- msr->fault_expected = false;
- break;
- case 46:
- /* Writing bits other than 0 is forbidden */
- if (!has_invtsc)
- goto next_stage;
- msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
- msr->write = true;
- msr->write_val = 0xdeadbeef;
- msr->fault_expected = true;
- break;
- case 47:
- /* Setting bit 0 enables the feature */
- if (!has_invtsc)
- goto next_stage;
- msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
- msr->write = true;
- msr->write_val = 1;
- msr->fault_expected = false;
- break;
-
- default:
- kvm_vm_free(vm);
- return;
- }
-
- vcpu_set_cpuid(vcpu);
-
- memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
-
- pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
- msr->idx, msr->write ? "write" : "read");
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- return;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
- return;
- }
-
-next_stage:
- stage++;
- kvm_vm_free(vm);
- }
-}
-
-static void guest_test_hcalls_access(void)
-{
- struct kvm_cpuid2 *prev_cpuid = NULL;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- int stage = 0;
- vm_vaddr_t hcall_page, hcall_params;
- struct hcall_data *hcall;
-
- while (true) {
- vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
-
- /* Hypercall input/output */
- hcall_page = vm_vaddr_alloc_pages(vm, 2);
- memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
- hcall_params = vm_vaddr_alloc_page(vm);
- memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
- hcall = addr_gva2hva(vm, hcall_params);
-
- vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
- vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
-
- if (!prev_cpuid) {
- vcpu_reset_hv_cpuid(vcpu);
-
- prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
- } else {
- vcpu_init_cpuid(vcpu, prev_cpuid);
- }
-
- switch (stage) {
- case 0:
- vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
- hcall->control = 0xbeef;
- hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
- break;
-
- case 1:
- hcall->control = HVCALL_POST_MESSAGE;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 2:
- vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
- hcall->control = HVCALL_POST_MESSAGE;
- hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
- break;
-
- case 3:
- hcall->control = HVCALL_SIGNAL_EVENT;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 4:
- vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
- hcall->control = HVCALL_SIGNAL_EVENT;
- hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
- break;
-
- case 5:
- hcall->control = HVCALL_RESET_DEBUG_SESSION;
- hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
- break;
- case 6:
- vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
- hcall->control = HVCALL_RESET_DEBUG_SESSION;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 7:
- vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
- hcall->control = HVCALL_RESET_DEBUG_SESSION;
- hcall->expect = HV_STATUS_OPERATION_DENIED;
- break;
-
- case 8:
- hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 9:
- vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
- hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
- hcall->expect = HV_STATUS_SUCCESS;
- break;
- case 10:
- hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 11:
- vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
- hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
- hcall->expect = HV_STATUS_SUCCESS;
- break;
-
- case 12:
- hcall->control = HVCALL_SEND_IPI;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 13:
- vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
- hcall->control = HVCALL_SEND_IPI;
- hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
- break;
- case 14:
- /* Nothing in 'sparse banks' -> success */
- hcall->control = HVCALL_SEND_IPI_EX;
- hcall->expect = HV_STATUS_SUCCESS;
- break;
-
- case 15:
- hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 16:
- vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
- hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
- hcall->expect = HV_STATUS_SUCCESS;
- break;
- case 17:
- /* XMM fast hypercall */
- hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
- hcall->ud_expected = true;
- break;
- case 18:
- vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
- hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
- hcall->ud_expected = false;
- hcall->expect = HV_STATUS_SUCCESS;
- break;
- case 19:
- hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
- hcall->expect = HV_STATUS_ACCESS_DENIED;
- break;
- case 20:
- vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
- hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
- hcall->expect = HV_STATUS_INVALID_PARAMETER;
- break;
- case 21:
- kvm_vm_free(vm);
- return;
- }
-
- vcpu_set_cpuid(vcpu);
-
- memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
-
- pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- return;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
- return;
- }
-
- stage++;
- kvm_vm_free(vm);
- }
-}
-
-int main(void)
-{
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
-
- pr_info("Testing access to Hyper-V specific MSRs\n");
- guest_test_msrs_access();
-
- pr_info("Testing access to Hyper-V hypercalls\n");
- guest_test_hcalls_access();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- */
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "kvm_util.h"
-#include "hyperv.h"
-#include "test_util.h"
-#include "vmx.h"
-
-#define RECEIVER_VCPU_ID_1 2
-#define RECEIVER_VCPU_ID_2 65
-
-#define IPI_VECTOR 0xfe
-
-static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
-
-struct hv_vpset {
- u64 format;
- u64 valid_bank_mask;
- u64 bank_contents[2];
-};
-
-enum HV_GENERIC_SET_FORMAT {
- HV_GENERIC_SET_SPARSE_4K,
- HV_GENERIC_SET_ALL,
-};
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
- u32 vector;
- u32 reserved;
- u64 cpu_mask;
-};
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
- u32 vector;
- u32 reserved;
- struct hv_vpset vp_set;
-};
-
-static inline void hv_init(vm_vaddr_t pgs_gpa)
-{
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-}
-
-static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
-{
- u32 vcpu_id;
-
- x2apic_enable();
- hv_init(pgs_gpa);
-
- vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-
- /* Signal sender vCPU we're ready */
- ipis_rcvd[vcpu_id] = (u64)-1;
-
- for (;;)
- asm volatile("sti; hlt; cli");
-}
-
-static void guest_ipi_handler(struct ex_regs *regs)
-{
- u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-
- ipis_rcvd[vcpu_id]++;
- wrmsr(HV_X64_MSR_EOI, 1);
-}
-
-static inline void nop_loop(void)
-{
- int i;
-
- for (i = 0; i < 100000000; i++)
- asm volatile("nop");
-}
-
-static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
-{
- struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
- struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
- int stage = 1, ipis_expected[2] = {0};
-
- hv_init(pgs_gpa);
- GUEST_SYNC(stage++);
-
- /* Wait for receiver vCPUs to come up */
- while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
- nop_loop();
- ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
-
- /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
- ipi->vector = IPI_VECTOR;
- ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
- hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
- GUEST_SYNC(stage++);
- /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
- hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
- IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
- GUEST_SYNC(stage++);
-
- /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- memset(hcall_page, 0, 4096);
- ipi_ex->vector = IPI_VECTOR;
- ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- ipi_ex->vp_set.valid_bank_mask = 1 << 0;
- ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
- GUEST_SYNC(stage++);
- /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
- (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
- GUEST_SYNC(stage++);
-
- /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- memset(hcall_page, 0, 4096);
- ipi_ex->vector = IPI_VECTOR;
- ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- ipi_ex->vp_set.valid_bank_mask = 1 << 1;
- ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
- GUEST_SYNC(stage++);
- /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
- (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
- GUEST_SYNC(stage++);
-
- /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
- memset(hcall_page, 0, 4096);
- ipi_ex->vector = IPI_VECTOR;
- ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
- ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
- ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
- GUEST_SYNC(stage++);
- /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
- hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
- (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
- GUEST_SYNC(stage++);
-
- /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
- memset(hcall_page, 0, 4096);
- ipi_ex->vector = IPI_VECTOR;
- ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
- GUEST_SYNC(stage++);
- /*
- * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
- */
- ipi_ex->vp_set.valid_bank_mask = 0;
- hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
- hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
- IPI_VECTOR, HV_GENERIC_SET_ALL);
- nop_loop();
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
- GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
- GUEST_SYNC(stage++);
-
- GUEST_DONE();
-}
-
-static void *vcpu_thread(void *arg)
-{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
- int old, r;
-
- r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
- TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
- vcpu->id, r);
-
- vcpu_run(vcpu);
-
- TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
-
- return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
- void *retval;
- int r;
-
- r = pthread_cancel(thread);
- TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
- vcpu->id, r);
-
- r = pthread_join(thread, &retval);
- TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
- vcpu->id, r);
- TEST_ASSERT(retval == PTHREAD_CANCELED,
- "expected retval=%p, got %p", PTHREAD_CANCELED,
- retval);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu[3];
- vm_vaddr_t hcall_page;
- pthread_t threads[2];
- int stage = 1, r;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
-
- vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
-
- /* Hypercall input/output */
- hcall_page = vm_vaddr_alloc_pages(vm, 2);
- memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-
- vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
- vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
- vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
- vcpu_set_hv_cpuid(vcpu[1]);
-
- vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
- vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
- vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
- vcpu_set_hv_cpuid(vcpu[2]);
-
- vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
-
- vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
- vcpu_set_hv_cpuid(vcpu[0]);
-
- r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
- TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
-
- r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
- TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
-
- while (true) {
- vcpu_run(vcpu[0]);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
-
- switch (get_ucall(vcpu[0], &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(uc.args[1] == stage,
- "Unexpected stage: %ld (%d expected)",
- uc.args[1], stage);
- break;
- case UCALL_DONE:
- goto done;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- stage++;
- }
-
-done:
- cancel_join_vcpu_thread(threads[0], vcpu[1]);
- cancel_join_vcpu_thread(threads[1], vcpu[2]);
- kvm_vm_free(vm);
-
- return r;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2022, Red Hat, Inc.
- *
- * Tests for Hyper-V extensions to SVM.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <linux/bitmap.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "hyperv.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-/* Exit to L1 from L2 with RDMSR instruction */
-static inline void rdmsr_from_l2(uint32_t msr)
-{
- /* Currently, L1 doesn't preserve GPRs during vmexits. */
- __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
- "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-void l2_guest_code(void)
-{
- u64 unused;
-
- GUEST_SYNC(3);
- /* Exit to L1 */
- vmmcall();
-
- /* MSR-Bitmap tests */
- rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
- rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
- rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
- vmmcall();
- rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
-
- GUEST_SYNC(5);
-
- /* L2 TLB flush tests */
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
- HV_HYPERCALL_FAST_BIT, 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
- HV_FLUSH_ALL_PROCESSORS);
- rdmsr_from_l2(MSR_FS_BASE);
- /*
- * Note: hypercall status (RAX) is not preserved correctly by L1 after
- * synthetic vmexit, use unchecked version.
- */
- __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
- HV_HYPERCALL_FAST_BIT, 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
- HV_FLUSH_ALL_PROCESSORS, &unused);
-
- /* Done, exit to L1 and never come back. */
- vmmcall();
-}
-
-static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
- struct hyperv_test_pages *hv_pages,
- vm_vaddr_t pgs_gpa)
-{
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- struct vmcb *vmcb = svm->vmcb;
- struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
-
- GUEST_SYNC(1);
-
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
- enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
-
- GUEST_ASSERT(svm->vmcb_gpa);
- /* Prepare for L2 execution. */
- generic_svm_setup(svm, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- /* L2 TLB flush setup */
- hve->partition_assist_page = hv_pages->partition_assist_gpa;
- hve->hv_enlightenments_control.nested_flush_hypercall = 1;
- hve->hv_vm_id = 1;
- hve->hv_vp_id = 1;
- current_vp_assist->nested_control.features.directhypercall = 1;
- *(u32 *)(hv_pages->partition_assist) = 0;
-
- GUEST_SYNC(2);
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- GUEST_SYNC(4);
- vmcb->save.rip += 3;
-
- /* Intercept RDMSR 0xc0000100 */
- vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
- __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
- vmcb->save.rip += 2; /* rdmsr */
-
- /* Enable enlightened MSR bitmap */
- hve->hv_enlightenments_control.msr_bitmap = 1;
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
- vmcb->save.rip += 2; /* rdmsr */
-
- /* Intercept RDMSR 0xc0000101 without telling KVM about it */
- __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
- /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
- vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
- run_guest(vmcb, svm->vmcb_gpa);
- /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- vmcb->save.rip += 3; /* vmcall */
-
- /* Now tell KVM we've changed MSR-Bitmap */
- vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
- vmcb->save.rip += 2; /* rdmsr */
-
-
- /*
- * L2 TLB flush test. First VMCALL should be handled directly by L0,
- * no VMCALL exit expected.
- */
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
- vmcb->save.rip += 2; /* rdmsr */
- /* Enable synthetic vmexit */
- *(u32 *)(hv_pages->partition_assist) = 1;
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
- GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
-
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- GUEST_SYNC(6);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
- vm_vaddr_t hcall_page;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- int stage;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_set_hv_cpuid(vcpu);
- vcpu_alloc_svm(vm, &nested_gva);
- vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
-
- hcall_page = vm_vaddr_alloc_pages(vm, 1);
- memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
-
- vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
- vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
-
- for (stage = 1;; stage++) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- }
-
-done:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- */
-#include <asm/barrier.h>
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-#include "test_util.h"
-#include "vmx.h"
-
-#define WORKER_VCPU_ID_1 2
-#define WORKER_VCPU_ID_2 65
-
-#define NTRY 100
-#define NTEST_PAGES 2
-
-struct hv_vpset {
- u64 format;
- u64 valid_bank_mask;
- u64 bank_contents[];
-};
-
-enum HV_GENERIC_SET_FORMAT {
- HV_GENERIC_SET_SPARSE_4K,
- HV_GENERIC_SET_ALL,
-};
-
-#define HV_FLUSH_ALL_PROCESSORS BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
- u64 address_space;
- u64 flags;
- u64 processor_mask;
- u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
- u64 address_space;
- u64 flags;
- struct hv_vpset hv_vp_set;
- u64 gva_list[];
-} __packed;
-
-/*
- * Pass the following info to 'workers' and 'sender'
- * - Hypercall page's GVA
- * - Hypercall page's GPA
- * - Test pages GVA
- * - GVAs of the test pages' PTEs
- */
-struct test_data {
- vm_vaddr_t hcall_gva;
- vm_paddr_t hcall_gpa;
- vm_vaddr_t test_pages;
- vm_vaddr_t test_pages_pte[NTEST_PAGES];
-};
-
-/* 'Worker' vCPU code checking the contents of the test page */
-static void worker_guest_code(vm_vaddr_t test_data)
-{
- struct test_data *data = (struct test_data *)test_data;
- u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
- void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
- u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
- u64 expected, val;
-
- x2apic_enable();
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-
- for (;;) {
- cpu_relax();
-
- expected = READ_ONCE(*this_cpu);
-
- /*
- * Make sure the value in the test page is read after reading
- * the expectation for the first time. Pairs with wmb() in
- * prepare_to_test().
- */
- rmb();
-
- val = READ_ONCE(*(u64 *)data->test_pages);
-
- /*
- * Make sure the value in the test page is read after before
- * reading the expectation for the second time. Pairs with wmb()
- * post_test().
- */
- rmb();
-
- /*
- * '0' indicates the sender is between iterations, wait until
- * the sender is ready for this vCPU to start checking again.
- */
- if (!expected)
- continue;
-
- /*
- * Re-read the per-vCPU byte to ensure the sender didn't move
- * onto a new iteration.
- */
- if (expected != READ_ONCE(*this_cpu))
- continue;
-
- GUEST_ASSERT(val == expected);
- }
-}
-
-/*
- * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
- * test page. '0' means don't check.
- */
-static void set_expected_val(void *addr, u64 val, int vcpu_id)
-{
- void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
-
- *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
-}
-
-/*
- * Update PTEs swapping two test pages.
- * TODO: use swap()/xchg() when these are provided.
- */
-static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
-{
- uint64_t tmp = *(uint64_t *)pte_gva1;
-
- *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
- *(uint64_t *)pte_gva2 = tmp;
-}
-
-/*
- * TODO: replace the silly NOP loop with a proper udelay() implementation.
- */
-static inline void do_delay(void)
-{
- int i;
-
- for (i = 0; i < 1000000; i++)
- asm volatile("nop");
-}
-
-/*
- * Prepare to test: 'disable' workers by setting the expectation to '0',
- * clear hypercall input page and then swap two test pages.
- */
-static inline void prepare_to_test(struct test_data *data)
-{
- /* Clear hypercall input page */
- memset((void *)data->hcall_gva, 0, PAGE_SIZE);
-
- /* 'Disable' workers */
- set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
- set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
-
- /* Make sure workers are 'disabled' before we swap PTEs. */
- wmb();
-
- /* Make sure workers have enough time to notice */
- do_delay();
-
- /* Swap test page mappings */
- swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
-}
-
-/*
- * Finalize the test: check hypercall resule set the expected val for
- * 'worker' CPUs and give them some time to test.
- */
-static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
-{
- /* Make sure we change the expectation after swapping PTEs */
- wmb();
-
- /* Set the expectation for workers, '0' means don't test */
- set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
- set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
-
- /* Make sure workers have enough time to test */
- do_delay();
-}
-
-#define TESTVAL1 0x0101010101010101
-#define TESTVAL2 0x0202020202020202
-
-/* Main vCPU doing the test */
-static void sender_guest_code(vm_vaddr_t test_data)
-{
- struct test_data *data = (struct test_data *)test_data;
- struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
- struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
- vm_paddr_t hcall_gpa = data->hcall_gpa;
- int i, stage = 1;
-
- wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
- wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
-
- /* "Slow" hypercalls */
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush->processor_mask = BIT(WORKER_VCPU_ID_1);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
- hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush->processor_mask = BIT(WORKER_VCPU_ID_1);
- flush->gva_list[0] = (u64)data->test_pages;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
- HV_FLUSH_ALL_PROCESSORS;
- flush->processor_mask = 0;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
- hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
- HV_FLUSH_ALL_PROCESSORS;
- flush->gva_list[0] = (u64)data->test_pages;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
- (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- /* bank_contents and gva_list occupy the same space, thus [1] */
- flush_ex->gva_list[1] = (u64)data->test_pages;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
- (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
- BIT_ULL(WORKER_VCPU_ID_1 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
- flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
- (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
- BIT_ULL(WORKER_VCPU_ID_2 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
- flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- /* bank_contents and gva_list occupy the same space, thus [2] */
- flush_ex->gva_list[2] = (u64)data->test_pages;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
- (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
- flush_ex->gva_list[0] = (u64)data->test_pages;
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- hcall_gpa, hcall_gpa + PAGE_SIZE);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- /* "Fast" hypercalls */
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->processor_mask = BIT(WORKER_VCPU_ID_1);
- hyperv_write_xmm_input(&flush->processor_mask, 1);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
- HV_HYPERCALL_FAST_BIT, 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->processor_mask = BIT(WORKER_VCPU_ID_1);
- flush->gva_list[0] = (u64)data->test_pages;
- hyperv_write_xmm_input(&flush->processor_mask, 1);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
- HV_HYPERCALL_FAST_BIT |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- hyperv_write_xmm_input(&flush->processor_mask, 1);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
- HV_HYPERCALL_FAST_BIT, 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
- HV_FLUSH_ALL_PROCESSORS);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush->gva_list[0] = (u64)data->test_pages;
- hyperv_write_xmm_input(&flush->processor_mask, 1);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
- HV_HYPERCALL_FAST_BIT |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
- HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
- HV_FLUSH_ALL_PROCESSORS);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
- HV_HYPERCALL_FAST_BIT |
- (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- /* bank_contents and gva_list occupy the same space, thus [1] */
- flush_ex->gva_list[1] = (u64)data->test_pages;
- hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
- HV_HYPERCALL_FAST_BIT |
- (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
- BIT_ULL(WORKER_VCPU_ID_1 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
- flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
- HV_HYPERCALL_FAST_BIT |
- (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, i % 2 ? TESTVAL1 :
- TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
- BIT_ULL(WORKER_VCPU_ID_2 / 64);
- flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
- flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
- /* bank_contents and gva_list occupy the same space, thus [2] */
- flush_ex->gva_list[2] = (u64)data->test_pages;
- hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
- HV_HYPERCALL_FAST_BIT |
- (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
- HV_HYPERCALL_FAST_BIT,
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_SYNC(stage++);
-
- /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
- for (i = 0; i < NTRY; i++) {
- prepare_to_test(data);
- flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
- flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
- flush_ex->gva_list[0] = (u64)data->test_pages;
- hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
- hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
- HV_HYPERCALL_FAST_BIT |
- (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
- 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
- post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
- i % 2 ? TESTVAL1 : TESTVAL2);
- }
-
- GUEST_DONE();
-}
-
-static void *vcpu_thread(void *arg)
-{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
- struct ucall uc;
- int old;
- int r;
-
- r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
- TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
- vcpu->id, r);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- default:
- TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
- }
-
- return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
- void *retval;
- int r;
-
- r = pthread_cancel(thread);
- TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
- vcpu->id, r);
-
- r = pthread_join(thread, &retval);
- TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
- vcpu->id, r);
- TEST_ASSERT(retval == PTHREAD_CANCELED,
- "expected retval=%p, got %p", PTHREAD_CANCELED,
- retval);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu[3];
- pthread_t threads[2];
- vm_vaddr_t test_data_page, gva;
- vm_paddr_t gpa;
- uint64_t *pte;
- struct test_data *data;
- struct ucall uc;
- int stage = 1, r, i;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
-
- vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
-
- /* Test data page */
- test_data_page = vm_vaddr_alloc_page(vm);
- data = (struct test_data *)addr_gva2hva(vm, test_data_page);
-
- /* Hypercall input/output */
- data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
- data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
- memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
-
- /*
- * Test pages: the first one is filled with '0x01's, the second with '0x02's
- * and the test will swap their mappings. The third page keeps the indication
- * about the current state of mappings.
- */
- data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
- for (i = 0; i < NTEST_PAGES; i++)
- memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
- (u8)(i + 1), PAGE_SIZE);
- set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
- set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
-
- /*
- * Get PTE pointers for test pages and map them inside the guest.
- * Use separate page for each PTE for simplicity.
- */
- gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
- for (i = 0; i < NTEST_PAGES; i++) {
- pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
- gpa = addr_hva2gpa(vm, pte);
- __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
- data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
- }
-
- /*
- * Sender vCPU which performs the test: swaps test pages, sets expectation
- * for 'workers' and issues TLB flush hypercalls.
- */
- vcpu_args_set(vcpu[0], 1, test_data_page);
- vcpu_set_hv_cpuid(vcpu[0]);
-
- /* Create worker vCPUs which check the contents of the test pages */
- vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
- vcpu_args_set(vcpu[1], 1, test_data_page);
- vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
- vcpu_set_hv_cpuid(vcpu[1]);
-
- vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
- vcpu_args_set(vcpu[2], 1, test_data_page);
- vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
- vcpu_set_hv_cpuid(vcpu[2]);
-
- r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
- TEST_ASSERT(!r, "pthread_create() failed");
-
- r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
- TEST_ASSERT(!r, "pthread_create() failed");
-
- while (true) {
- vcpu_run(vcpu[0]);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
-
- switch (get_ucall(vcpu[0], &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(uc.args[1] == stage,
- "Unexpected stage: %ld (%d expected)",
- uc.args[1], stage);
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- stage++;
- }
-
-done:
- cancel_join_vcpu_thread(threads[0], vcpu[1]);
- cancel_join_vcpu_thread(threads[1], vcpu[2]);
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Google LLC.
- *
- * Tests for adjusting the KVM clock from userspace
- */
-#include <asm/kvm_para.h>
-#include <asm/pvclock.h>
-#include <asm/pvclock-abi.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <time.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct test_case {
- uint64_t kvmclock_base;
- int64_t realtime_offset;
-};
-
-static struct test_case test_cases[] = {
- { .kvmclock_base = 0 },
- { .kvmclock_base = 180 * NSEC_PER_SEC },
- { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
- { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
-};
-
-#define GUEST_SYNC_CLOCK(__stage, __val) \
- GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
-
-static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
-{
- int i;
-
- wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
- for (i = 0; i < ARRAY_SIZE(test_cases); i++)
- GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
-}
-
-#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
-
-static inline void assert_flags(struct kvm_clock_data *data)
-{
- TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
- "unexpected clock data flags: %x (want set: %x)",
- data->flags, EXPECTED_FLAGS);
-}
-
-static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
- struct kvm_clock_data *end)
-{
- uint64_t obs, exp_lo, exp_hi;
-
- obs = uc->args[2];
- exp_lo = start->clock;
- exp_hi = end->clock;
-
- assert_flags(start);
- assert_flags(end);
-
- TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
- "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
- obs, exp_lo, exp_hi);
-
- pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
- obs, exp_lo, exp_hi);
-}
-
-static void handle_abort(struct ucall *uc)
-{
- REPORT_GUEST_ASSERT(*uc);
-}
-
-static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
-{
- struct kvm_clock_data data;
-
- memset(&data, 0, sizeof(data));
-
- data.clock = test_case->kvmclock_base;
- if (test_case->realtime_offset) {
- struct timespec ts;
- int r;
-
- data.flags |= KVM_CLOCK_REALTIME;
- do {
- r = clock_gettime(CLOCK_REALTIME, &ts);
- if (!r)
- break;
- } while (errno == EINTR);
-
- TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
-
- data.realtime = ts.tv_sec * NSEC_PER_SEC;
- data.realtime += ts.tv_nsec;
- data.realtime += test_case->realtime_offset;
- }
-
- vm_ioctl(vm, KVM_SET_CLOCK, &data);
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
- struct kvm_clock_data start, end;
- struct kvm_vm *vm = vcpu->vm;
- struct ucall uc;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
- setup_clock(vm, &test_cases[i]);
-
- vm_ioctl(vm, KVM_GET_CLOCK, &start);
-
- vcpu_run(vcpu);
- vm_ioctl(vm, KVM_GET_CLOCK, &end);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- handle_sync(&uc, &start, &end);
- break;
- case UCALL_ABORT:
- handle_abort(&uc);
- return;
- default:
- TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
- }
- }
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- vm_vaddr_t pvti_gva;
- vm_paddr_t pvti_gpa;
- struct kvm_vm *vm;
- int flags;
-
- flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
- TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
-
- TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
- pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
- pvti_gpa = addr_gva2gpa(vm, pvti_gva);
- vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
-
- enter_guest(vcpu);
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct msr_data {
- uint32_t idx;
- const char *name;
-};
-
-#define TEST_MSR(msr) { .idx = msr, .name = #msr }
-#define UCALL_PR_MSR 0xdeadbeef
-#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
-
-/*
- * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
- * written, as the KVM_CPUID_FEATURES leaf is cleared.
- */
-static struct msr_data msrs_to_test[] = {
- TEST_MSR(MSR_KVM_SYSTEM_TIME),
- TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
- TEST_MSR(MSR_KVM_WALL_CLOCK),
- TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
- TEST_MSR(MSR_KVM_ASYNC_PF_EN),
- TEST_MSR(MSR_KVM_STEAL_TIME),
- TEST_MSR(MSR_KVM_PV_EOI_EN),
- TEST_MSR(MSR_KVM_POLL_CONTROL),
- TEST_MSR(MSR_KVM_ASYNC_PF_INT),
- TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
-};
-
-static void test_msr(struct msr_data *msr)
-{
- uint64_t ignored;
- uint8_t vector;
-
- PR_MSR(msr);
-
- vector = rdmsr_safe(msr->idx, &ignored);
- GUEST_ASSERT_EQ(vector, GP_VECTOR);
-
- vector = wrmsr_safe(msr->idx, 0);
- GUEST_ASSERT_EQ(vector, GP_VECTOR);
-}
-
-struct hcall_data {
- uint64_t nr;
- const char *name;
-};
-
-#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
-#define UCALL_PR_HCALL 0xdeadc0de
-#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
-
-/*
- * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
- * features have been cleared in KVM_CPUID_FEATURES.
- */
-static struct hcall_data hcalls_to_test[] = {
- TEST_HCALL(KVM_HC_KICK_CPU),
- TEST_HCALL(KVM_HC_SEND_IPI),
- TEST_HCALL(KVM_HC_SCHED_YIELD),
-};
-
-static void test_hcall(struct hcall_data *hc)
-{
- uint64_t r;
-
- PR_HCALL(hc);
- r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
- GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
-}
-
-static void guest_main(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
- test_msr(&msrs_to_test[i]);
- }
-
- for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
- test_hcall(&hcalls_to_test[i]);
- }
-
- GUEST_DONE();
-}
-
-static void pr_msr(struct ucall *uc)
-{
- struct msr_data *msr = (struct msr_data *)uc->args[0];
-
- pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
-}
-
-static void pr_hcall(struct ucall *uc)
-{
- struct hcall_data *hc = (struct hcall_data *)uc->args[0];
-
- pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- while (true) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_PR_MSR:
- pr_msr(&uc);
- break;
- case UCALL_PR_HCALL:
- pr_hcall(&uc);
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- return;
- case UCALL_DONE:
- return;
- }
- }
-}
-
-static void test_pv_unhalt(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct kvm_cpuid_entry2 *ent;
- u32 kvm_sig_old;
-
- pr_info("testing KVM_FEATURE_PV_UNHALT\n");
-
- TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
-
- /* KVM_PV_UNHALT test */
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
-
- TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
- "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
-
- /* Make sure KVM clears vcpu->arch.kvm_cpuid */
- ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
- kvm_sig_old = ent->ebx;
- ent->ebx = 0xdeadbeef;
- vcpu_set_cpuid(vcpu);
-
- vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
- ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
- ent->ebx = kvm_sig_old;
- vcpu_set_cpuid(vcpu);
-
- TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
- "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
-
- /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
-
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
- vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
-
- vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
-
- enter_guest(vcpu);
- kvm_vm_free(vm);
-
- test_pv_unhalt();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * maximum APIC ID capability tests
- *
- * Copyright (C) 2022, Intel, Inc.
- *
- * Tests for getting/setting maximum APIC ID capability
- */
-
-#include "kvm_util.h"
-
-#define MAX_VCPU_ID 2
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- int ret;
-
- vm = vm_create_barebones();
-
- /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
- ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
-
- /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
- ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
- TEST_ASSERT(ret < 0,
- "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
-
- /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
- if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
- vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
-
- /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
- ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
- TEST_ASSERT(ret < 0,
- "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
- }
-
- /* Set KVM_CAP_MAX_VCPU_ID */
- vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
-
- /* Try to set KVM_CAP_MAX_VCPU_ID again */
- ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
- TEST_ASSERT(ret < 0,
- "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
-
- /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
- ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
- TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
-
- /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
- ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
- TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
-
- /* Create vCPU with id within bounds */
- ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
- TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
-
- close(ret);
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define CPUID_MWAIT (1u << 3)
-
-enum monitor_mwait_testcases {
- MWAIT_QUIRK_DISABLED = BIT(0),
- MISC_ENABLES_QUIRK_DISABLED = BIT(1),
- MWAIT_DISABLED = BIT(2),
-};
-
-/*
- * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
- * other scenarios KVM should emulate them as nops.
- */
-#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \
-do { \
- bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \
- ((testcase) & MWAIT_DISABLED); \
- \
- if (fault_wanted) \
- __GUEST_ASSERT((vector) == UD_VECTOR, \
- "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
- else \
- __GUEST_ASSERT(!(vector), \
- "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
-} while (0)
-
-static void guest_monitor_wait(int testcase)
-{
- u8 vector;
-
- GUEST_SYNC(testcase);
-
- /*
- * Arbitrarily MONITOR this function, SVM performs fault checks before
- * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
- */
- vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
- GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
-
- vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
- GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- uint64_t disabled_quirks;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- int testcase;
-
- TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
-
- while (1) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
- }
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
- }
-
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-/*
- * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
- * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
- */
-#define FAKE_TRIPLE_FAULT_VECTOR 0xaa
-
-/* Arbitrary 32-bit error code injected by this test. */
-#define SS_ERROR_CODE 0xdeadbeef
-
-/*
- * Bit '0' is set on Intel if the exception occurs while delivering a previous
- * event/exception. AMD's wording is ambiguous, but presumably the bit is set
- * if the exception occurs while delivering an external event, e.g. NMI or INTR,
- * but not for exceptions that occur when delivering other exceptions or
- * software interrupts.
- *
- * Note, Intel's name for it, "External event", is misleading and much more
- * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
- */
-#define ERROR_CODE_EXT_FLAG BIT(0)
-
-/*
- * Bit '1' is set if the fault occurred when looking up a descriptor in the
- * IDT, which is the case here as the IDT is empty/NULL.
- */
-#define ERROR_CODE_IDT_FLAG BIT(1)
-
-/*
- * The #GP that occurs when vectoring #SS should show the index into the IDT
- * for #SS, plus have the "IDT flag" set.
- */
-#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
-#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
-
-/*
- * Intel and AMD both shove '0' into the error code on #DF, regardless of what
- * led to the double fault.
- */
-#define DF_ERROR_CODE 0
-
-#define INTERCEPT_SS (BIT_ULL(SS_VECTOR))
-#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
-#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
-
-static void l2_ss_pending_test(void)
-{
- GUEST_SYNC(SS_VECTOR);
-}
-
-static void l2_ss_injected_gp_test(void)
-{
- GUEST_SYNC(GP_VECTOR);
-}
-
-static void l2_ss_injected_df_test(void)
-{
- GUEST_SYNC(DF_VECTOR);
-}
-
-static void l2_ss_injected_tf_test(void)
-{
- GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
-}
-
-static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
- uint32_t error_code)
-{
- struct vmcb *vmcb = svm->vmcb;
- struct vmcb_control_area *ctrl = &vmcb->control;
-
- vmcb->save.rip = (u64)l2_code;
- run_guest(vmcb, svm->vmcb_gpa);
-
- if (vector == FAKE_TRIPLE_FAULT_VECTOR)
- return;
-
- GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
- GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
-}
-
-static void l1_svm_code(struct svm_test_data *svm)
-{
- struct vmcb_control_area *ctrl = &svm->vmcb->control;
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- svm->vmcb->save.idtr.limit = 0;
- ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
-
- ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
- svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
- svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
-
- ctrl->intercept_exceptions = INTERCEPT_SS_DF;
- svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
-
- ctrl->intercept_exceptions = INTERCEPT_SS;
- svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
- GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
-
- GUEST_DONE();
-}
-
-static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
-{
- GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
-
- GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
-
- if (vector == FAKE_TRIPLE_FAULT_VECTOR)
- return;
-
- GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
- GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
- GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
-}
-
-static void l1_vmx_code(struct vmx_pages *vmx)
-{
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
-
- GUEST_ASSERT_EQ(load_vmcs(vmx), true);
-
- prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
-
- /*
- * VMX disallows injecting an exception with error_code[31:16] != 0,
- * and hardware will never generate a VM-Exit with bits 31:16 set.
- * KVM should likewise truncate the "bad" userspace value.
- */
- GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
- vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
- vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
-
- GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
- vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
-
- GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
- vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
- GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
-
- GUEST_DONE();
-}
-
-static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
-{
- if (this_cpu_has(X86_FEATURE_SVM))
- l1_svm_code(test_data);
- else
- l1_vmx_code(test_data);
-}
-
-static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
-{
- struct ucall uc;
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(vector == uc.args[1],
- "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
- break;
- case UCALL_DONE:
- TEST_ASSERT(vector == -1,
- "Expected L2 to ask for %d, L2 says it's done", vector);
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- default:
- TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
- }
-}
-
-static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
-{
- struct kvm_vcpu_events events;
-
- vcpu_events_get(vcpu, &events);
-
- TEST_ASSERT(!events.exception.pending,
- "Vector %d unexpectedlt pending", events.exception.nr);
- TEST_ASSERT(!events.exception.injected,
- "Vector %d unexpectedly injected", events.exception.nr);
-
- events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
- events.exception.pending = !inject;
- events.exception.injected = inject;
- events.exception.nr = SS_VECTOR;
- events.exception.has_error_code = true;
- events.exception.error_code = SS_ERROR_CODE;
- vcpu_events_set(vcpu, &events);
-}
-
-/*
- * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
- * when an exception is being queued for L2. Specifically, verify that KVM
- * honors L1 exception intercept controls when a #SS is pending/injected,
- * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
- * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
- */
-int main(int argc, char *argv[])
-{
- vm_vaddr_t nested_test_data_gva;
- struct kvm_vcpu_events events;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
-
- if (kvm_cpu_has(X86_FEATURE_SVM))
- vcpu_alloc_svm(vm, &nested_test_data_gva);
- else
- vcpu_alloc_vmx(vm, &nested_test_data_gva);
-
- vcpu_args_set(vcpu, 1, nested_test_data_gva);
-
- /* Run L1 => L2. L2 should sync and request #SS. */
- vcpu_run(vcpu);
- assert_ucall_vector(vcpu, SS_VECTOR);
-
- /* Pend #SS and request immediate exit. #SS should still be pending. */
- queue_ss_exception(vcpu, false);
- vcpu->run->immediate_exit = true;
- vcpu_run_complete_io(vcpu);
-
- /* Verify the pending events comes back out the same as it went in. */
- vcpu_events_get(vcpu, &events);
- TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
- KVM_VCPUEVENT_VALID_PAYLOAD);
- TEST_ASSERT_EQ(events.exception.pending, true);
- TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
- TEST_ASSERT_EQ(events.exception.has_error_code, true);
- TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
-
- /*
- * Run for real with the pending #SS, L1 should get a VM-Exit due to
- * #SS interception and re-enter L2 to request #GP (via injected #SS).
- */
- vcpu->run->immediate_exit = false;
- vcpu_run(vcpu);
- assert_ucall_vector(vcpu, GP_VECTOR);
-
- /*
- * Inject #SS, the #SS should bypass interception and cause #GP, which
- * L1 should intercept before KVM morphs it to #DF. L1 should then
- * disable #GP interception and run L2 to request #DF (via #SS => #GP).
- */
- queue_ss_exception(vcpu, true);
- vcpu_run(vcpu);
- assert_ucall_vector(vcpu, DF_VECTOR);
-
- /*
- * Inject #SS, the #SS should bypass interception and cause #GP, which
- * L1 is no longer interception, and so should see a #DF VM-Exit. L1
- * should then signal that is done.
- */
- queue_ss_exception(vcpu, true);
- vcpu_run(vcpu);
- assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
-
- /*
- * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so
- * should see nested TRIPLE_FAULT / SHUTDOWN.
- */
- queue_ss_exception(vcpu, true);
- vcpu_run(vcpu);
- assert_ucall_vector(vcpu, -1);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Usage: to be run via nx_huge_page_test.sh, which does the necessary
- * environment setup and teardown
- *
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <stdint.h>
-#include <time.h>
-
-#include <test_util.h>
-#include "kvm_util.h"
-#include "processor.h"
-
-#define HPAGE_SLOT 10
-#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */
-#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */
-#define PAGES_PER_2MB_HUGE_PAGE 512
-#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE)
-
-/*
- * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
- * being run without it.
- */
-#define MAGIC_TOKEN 887563923
-
-/*
- * x86 opcode for the return instruction. Used to call into, and then
- * immediately return from, memory backed with hugepages.
- */
-#define RETURN_OPCODE 0xC3
-
-/* Call the specified memory address. */
-static void guest_do_CALL(uint64_t target)
-{
- ((void (*)(void)) target)();
-}
-
-/*
- * Exit the VM after each memory access so that the userspace component of the
- * test can make assertions about the pages backing the VM.
- *
- * See the below for an explanation of how each access should affect the
- * backing mappings.
- */
-void guest_code(void)
-{
- uint64_t hpage_1 = HPAGE_GVA;
- uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
- uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
-
- READ_ONCE(*(uint64_t *)hpage_1);
- GUEST_SYNC(1);
-
- READ_ONCE(*(uint64_t *)hpage_2);
- GUEST_SYNC(2);
-
- guest_do_CALL(hpage_1);
- GUEST_SYNC(3);
-
- guest_do_CALL(hpage_3);
- GUEST_SYNC(4);
-
- READ_ONCE(*(uint64_t *)hpage_1);
- GUEST_SYNC(5);
-
- READ_ONCE(*(uint64_t *)hpage_3);
- GUEST_SYNC(6);
-}
-
-static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
-{
- int actual_pages_2m;
-
- actual_pages_2m = vm_get_stat(vm, "pages_2m");
-
- TEST_ASSERT(actual_pages_2m == expected_pages_2m,
- "Unexpected 2m page count. Expected %d, got %d",
- expected_pages_2m, actual_pages_2m);
-}
-
-static void check_split_count(struct kvm_vm *vm, int expected_splits)
-{
- int actual_splits;
-
- actual_splits = vm_get_stat(vm, "nx_lpage_splits");
-
- TEST_ASSERT(actual_splits == expected_splits,
- "Unexpected NX huge page split count. Expected %d, got %d",
- expected_splits, actual_splits);
-}
-
-static void wait_for_reclaim(int reclaim_period_ms)
-{
- long reclaim_wait_ms;
- struct timespec ts;
-
- reclaim_wait_ms = reclaim_period_ms * 5;
- ts.tv_sec = reclaim_wait_ms / 1000;
- ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
- nanosleep(&ts, NULL);
-}
-
-void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
- bool reboot_permissions)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t nr_bytes;
- void *hva;
- int r;
-
- vm = vm_create(1);
-
- if (disable_nx_huge_pages) {
- r = __vm_disable_nx_huge_pages(vm);
- if (reboot_permissions) {
- TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
- } else {
- TEST_ASSERT(r == -1 && errno == EPERM,
- "This process should not have permission to disable NX huge pages");
- return;
- }
- }
-
- vcpu = vm_vcpu_add(vm, 0, guest_code);
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
- HPAGE_GPA, HPAGE_SLOT,
- HPAGE_SLOT_NPAGES, 0);
-
- nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
-
- /*
- * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
- * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
- * whenever KVM is shadowing the guest page tables).
- *
- * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
- * pages irrespective of the guest page size, so map with 4KiB pages
- * to test that that is the case.
- */
- if (kvm_is_tdp_enabled())
- virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
- else
- virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
-
- hva = addr_gpa2hva(vm, HPAGE_GPA);
- memset(hva, RETURN_OPCODE, nr_bytes);
-
- check_2m_page_count(vm, 0);
- check_split_count(vm, 0);
-
- /*
- * The guest code will first read from the first hugepage, resulting
- * in a huge page mapping being created.
- */
- vcpu_run(vcpu);
- check_2m_page_count(vm, 1);
- check_split_count(vm, 0);
-
- /*
- * Then the guest code will read from the second hugepage, resulting
- * in another huge page mapping being created.
- */
- vcpu_run(vcpu);
- check_2m_page_count(vm, 2);
- check_split_count(vm, 0);
-
- /*
- * Next, the guest will execute from the first huge page, causing it
- * to be remapped at 4k.
- *
- * If NX huge pages are disabled, this should have no effect.
- */
- vcpu_run(vcpu);
- check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
- check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
-
- /*
- * Executing from the third huge page (previously unaccessed) will
- * cause part to be mapped at 4k.
- *
- * If NX huge pages are disabled, it should be mapped at 2M.
- */
- vcpu_run(vcpu);
- check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
- check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
-
- /* Reading from the first huge page again should have no effect. */
- vcpu_run(vcpu);
- check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
- check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
-
- /* Give recovery thread time to run. */
- wait_for_reclaim(reclaim_period_ms);
-
- /*
- * Now that the reclaimer has run, all the split pages should be gone.
- *
- * If NX huge pages are disabled, the relaimer will not run, so
- * nothing should change from here on.
- */
- check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
- check_split_count(vm, 0);
-
- /*
- * The 4k mapping on hpage 3 should have been removed, so check that
- * reading from it causes a huge page mapping to be installed.
- */
- vcpu_run(vcpu);
- check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
- check_split_count(vm, 0);
-
- kvm_vm_free(vm);
-}
-
-static void help(char *name)
-{
- puts("");
- printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
- puts("");
- printf(" -p: The NX reclaim period in milliseconds.\n");
- printf(" -t: The magic token to indicate environment setup is done.\n");
- printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
- puts("");
- exit(0);
-}
-
-int main(int argc, char **argv)
-{
- int reclaim_period_ms = 0, token = 0, opt;
- bool reboot_permissions = false;
-
- while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
- switch (opt) {
- case 'p':
- reclaim_period_ms = atoi_positive("Reclaim period", optarg);
- break;
- case 't':
- token = atoi_paranoid(optarg);
- break;
- case 'r':
- reboot_permissions = true;
- break;
- case 'h':
- default:
- help(argv[0]);
- break;
- }
- }
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
-
- __TEST_REQUIRE(token == MAGIC_TOKEN,
- "This test must be run with the magic token via '-t %d'.\n"
- "Running via nx_huge_pages_test.sh, which also handles "
- "environment setup, is strongly recommended.", MAGIC_TOKEN);
-
- run_test(reclaim_period_ms, false, reboot_permissions);
- run_test(reclaim_period_ms, true, reboot_permissions);
-
- return 0;
-}
-
+++ /dev/null
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only */
-#
-# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
-# Makes use of root privileges to set up huge pages and KVM module parameters.
-#
-# Copyright (C) 2022, Google LLC.
-
-set -e
-
-NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
-NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
-NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
-HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
-
-# If we're already root, the host might not have sudo.
-if [ $(whoami) == "root" ]; then
- function do_sudo () {
- "$@"
- }
-else
- function do_sudo () {
- sudo "$@"
- }
-fi
-
-set +e
-
-function sudo_echo () {
- echo "$1" | do_sudo tee -a "$2" > /dev/null
-}
-
-NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
-
-sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
-
-(
- set -e
-
- sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
- sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
- sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
- sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
-
- # Test with reboot permissions
- if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
- echo Running test with CAP_SYS_BOOT enabled
- $NXECUTABLE -t 887563923 -p 100 -r
- test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
- else
- echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
- fi
-
- # Test without reboot permissions
- if [ $(whoami) != "root" ] ; then
- echo Running test with CAP_SYS_BOOT disabled
- $NXECUTABLE -t 887563923 -p 100
- else
- echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
- fi
-)
-RET=$?
-
-sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
-sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
-sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
-sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
-
-exit $RET
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies expected behavior of controlling guest access to
- * MSR_PLATFORM_INFO.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
-
-static void guest_code(void)
-{
- uint64_t msr_platform_info;
- uint8_t vector;
-
- GUEST_SYNC(true);
- msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
- GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-
- GUEST_SYNC(false);
- vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
- GUEST_ASSERT_EQ(vector, GP_VECTOR);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t msr_platform_info;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
- vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
- msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-
- for (;;) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
- break;
- case UCALL_DONE:
- goto done;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unexpected ucall %lu", uc.cmd);
- break;
- }
- }
-
-done:
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-#include <x86intrin.h>
-
-#include "pmu.h"
-#include "processor.h"
-
-/* Number of iterations of the loop for the guest measurement payload. */
-#define NUM_LOOPS 10
-
-/* Each iteration of the loop retires one branch instruction. */
-#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
-
-/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
- */
-#define NUM_INSNS_PER_LOOP 3
-
-/*
- * Number of "extra" instructions that will be counted, i.e. the number of
- * instructions that are needed to set up the loop and then disable the
- * counter. 2 MOV, 2 XOR, 1 WRMSR.
- */
-#define NUM_EXTRA_INSNS 5
-
-/* Total number of instructions retired within the measured section. */
-#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
-
-
-static uint8_t kvm_pmu_version;
-static bool kvm_has_perf_caps;
-
-static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
- void *guest_code,
- uint8_t pmu_version,
- uint64_t perf_capabilities)
-{
- struct kvm_vm *vm;
-
- vm = vm_create_with_one_vcpu(vcpu, guest_code);
- sync_global_to_guest(vm, kvm_pmu_version);
-
- /*
- * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
- * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
- */
- if (kvm_has_perf_caps)
- vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
-
- vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
- return vm;
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- do {
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_PRINTF:
- pr_info("%s", uc.buffer);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
- } while (uc.cmd != UCALL_DONE);
-}
-
-static uint8_t guest_get_pmu_version(void)
-{
- /*
- * Return the effective PMU version, i.e. the minimum between what KVM
- * supports and what is enumerated to the guest. The host deliberately
- * advertises a PMU version to the guest beyond what is actually
- * supported by KVM to verify KVM doesn't freak out and do something
- * bizarre with an architecturally valid, but unsupported, version.
- */
- return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
-}
-
-/*
- * If an architectural event is supported and guaranteed to generate at least
- * one "hit, assert that its count is non-zero. If an event isn't supported or
- * the test can't guarantee the associated action will occur, then all bets are
- * off regarding the count, i.e. no checks can be done.
- *
- * Sanity check that in all cases, the event doesn't count when it's disabled,
- * and that KVM correctly emulates the write of an arbitrary value.
- */
-static void guest_assert_event_count(uint8_t idx,
- struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr)
-{
- uint64_t count;
-
- count = _rdpmc(pmc);
- if (!this_pmu_has(event))
- goto sanity_checks;
-
- switch (idx) {
- case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
- break;
- case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
- break;
- case INTEL_ARCH_LLC_REFERENCES_INDEX:
- case INTEL_ARCH_LLC_MISSES_INDEX:
- if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
- !this_cpu_has(X86_FEATURE_CLFLUSH))
- break;
- fallthrough;
- case INTEL_ARCH_CPU_CYCLES_INDEX:
- case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
- GUEST_ASSERT_NE(count, 0);
- break;
- case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
- GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
- break;
- default:
- break;
- }
-
-sanity_checks:
- __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
- GUEST_ASSERT_EQ(_rdpmc(pmc), count);
-
- wrmsr(pmc_msr, 0xdead);
- GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
-}
-
-/*
- * Enable and disable the PMC in a monolithic asm blob to ensure that the
- * compiler can't insert _any_ code into the measured sequence. Note, ECX
- * doesn't need to be clobbered as the input value, @pmc_msr, is restored
- * before the end of the sequence.
- *
- * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
- * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
- * misses, i.e. to allow testing that those events actually count.
- *
- * If forced emulation is enabled (and specified), force emulation on a subset
- * of the measured code to verify that KVM correctly emulates instructions and
- * branches retired events in conjunction with hardware also counting said
- * events.
- */
-#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
-do { \
- __asm__ __volatile__("wrmsr\n\t" \
- " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
- "1:\n\t" \
- clflush "\n\t" \
- "mfence\n\t" \
- FEP "loop 1b\n\t" \
- FEP "mov %%edi, %%ecx\n\t" \
- FEP "xor %%eax, %%eax\n\t" \
- FEP "xor %%edx, %%edx\n\t" \
- "wrmsr\n\t" \
- :: "a"((uint32_t)_value), "d"(_value >> 32), \
- "c"(_msr), "D"(_msr) \
- ); \
-} while (0)
-
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
-do { \
- wrmsr(pmc_msr, 0); \
- \
- if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
- else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
- else \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
- \
- guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
-} while (0)
-
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr,
- uint32_t ctrl_msr, uint64_t ctrl_msr_value)
-{
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
-
- if (is_forced_emulation_enabled)
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL \
-({ \
- struct kvm_x86_pmu_feature feature = {}; \
- \
- feature; \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
- return !(*(u64 *)&event);
-}
-
-static void guest_test_arch_event(uint8_t idx)
-{
- const struct {
- struct kvm_x86_pmu_feature gp_event;
- struct kvm_x86_pmu_feature fixed_event;
- } intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
- /*
- * Note, the fixed counter for reference cycles is NOT the same
- * as the general purpose architectural event. The fixed counter
- * explicitly counts at the same frequency as the TSC, whereas
- * the GP event counts at a fixed, but uarch specific, frequency.
- * Bundle them here for simplicity.
- */
- [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
- [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
- };
-
- uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
- uint32_t pmu_version = guest_get_pmu_version();
- /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
- bool guest_has_perf_global_ctrl = pmu_version >= 2;
- struct kvm_x86_pmu_feature gp_event, fixed_event;
- uint32_t base_pmc_msr;
- unsigned int i;
-
- /* The host side shouldn't invoke this without a guest PMU. */
- GUEST_ASSERT(pmu_version);
-
- if (this_cpu_has(X86_FEATURE_PDCM) &&
- rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
- base_pmc_msr = MSR_IA32_PMC0;
- else
- base_pmc_msr = MSR_IA32_PERFCTR0;
-
- gp_event = intel_event_to_feature[idx].gp_event;
- GUEST_ASSERT_EQ(idx, gp_event.f.bit);
-
- GUEST_ASSERT(nr_gp_counters);
-
- for (i = 0; i < nr_gp_counters; i++) {
- uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
- ARCH_PERFMON_EVENTSEL_ENABLE |
- intel_pmu_arch_events[idx];
-
- wrmsr(MSR_P6_EVNTSEL0 + i, 0);
- if (guest_has_perf_global_ctrl)
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
-
- __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
- MSR_P6_EVNTSEL0 + i, eventsel);
- }
-
- if (!guest_has_perf_global_ctrl)
- return;
-
- fixed_event = intel_event_to_feature[idx].fixed_event;
- if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
- return;
-
- i = fixed_event.f.bit;
-
- wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-
- __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
- MSR_CORE_PERF_FIXED_CTR0 + i,
- MSR_CORE_PERF_GLOBAL_CTRL,
- FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-}
-
-static void guest_test_arch_events(void)
-{
- uint8_t i;
-
- for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
- guest_test_arch_event(i);
-
- GUEST_DONE();
-}
-
-static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t length, uint8_t unavailable_mask)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /* Testing arch events requires a vPMU (there are no negative tests). */
- if (!pmu_version)
- return;
-
- vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
- pmu_version, perf_capabilities);
-
- vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
- length);
- vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
- unavailable_mask);
-
- run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
- * that aren't defined counter MSRs *probably* don't exist, but there's no
- * guarantee that currently undefined MSR indices won't be used for something
- * other than PMCs in the future.
- */
-#define MAX_NR_GP_COUNTERS 8
-#define MAX_NR_FIXED_COUNTERS 3
-
-#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
-__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
- "Expected %s on " #insn "(0x%x), got vector %u", \
- expect_gp ? "#GP" : "no fault", msr, vector) \
-
-#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
- __GUEST_ASSERT(val == expected_val, \
- "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
- msr, expected_val, val);
-
-static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
- uint64_t expected_val)
-{
- uint8_t vector;
- uint64_t val;
-
- vector = rdpmc_safe(rdpmc_idx, &val);
- GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
- if (expect_success)
- GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
-
- if (!is_forced_emulation_enabled)
- return;
-
- vector = rdpmc_safe_fep(rdpmc_idx, &val);
- GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
- if (expect_success)
- GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
-}
-
-static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
- uint8_t nr_counters, uint32_t or_mask)
-{
- const bool pmu_has_fast_mode = !guest_get_pmu_version();
- uint8_t i;
-
- for (i = 0; i < nr_possible_counters; i++) {
- /*
- * TODO: Test a value that validates full-width writes and the
- * width of the counters.
- */
- const uint64_t test_val = 0xffff;
- const uint32_t msr = base_msr + i;
-
- /*
- * Fixed counters are supported if the counter is less than the
- * number of enumerated contiguous counters *or* the counter is
- * explicitly enumerated in the supported counters mask.
- */
- const bool expect_success = i < nr_counters || (or_mask & BIT(i));
-
- /*
- * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
- * unsupported, i.e. doesn't #GP and reads back '0'.
- */
- const uint64_t expected_val = expect_success ? test_val : 0;
- const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
- msr != MSR_P6_PERFCTR1;
- uint32_t rdpmc_idx;
- uint8_t vector;
- uint64_t val;
-
- vector = wrmsr_safe(msr, test_val);
- GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
-
- vector = rdmsr_safe(msr, &val);
- GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
-
- /* On #GP, the result of RDMSR is undefined. */
- if (!expect_gp)
- GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
-
- /*
- * Redo the read tests with RDPMC, which has different indexing
- * semantics and additional capabilities.
- */
- rdpmc_idx = i;
- if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
- rdpmc_idx |= INTEL_RDPMC_FIXED;
-
- guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
-
- /*
- * KVM doesn't support non-architectural PMUs, i.e. it should
- * impossible to have fast mode RDPMC. Verify that attempting
- * to use fast RDPMC always #GPs.
- */
- GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
- rdpmc_idx |= INTEL_RDPMC_FAST;
- guest_test_rdpmc(rdpmc_idx, false, -1ull);
-
- vector = wrmsr_safe(msr, 0);
- GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
- }
-}
-
-static void guest_test_gp_counters(void)
-{
- uint8_t pmu_version = guest_get_pmu_version();
- uint8_t nr_gp_counters = 0;
- uint32_t base_msr;
-
- if (pmu_version)
- nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-
- /*
- * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
- * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
- * of GP counters. If there are no GP counters, require KVM to leave
- * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
- * follow the spirit of the architecture and only globally enable GP
- * counters, of which there are none.
- */
- if (pmu_version > 1) {
- uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
-
- if (nr_gp_counters)
- GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
- else
- GUEST_ASSERT_EQ(global_ctrl, 0);
- }
-
- if (this_cpu_has(X86_FEATURE_PDCM) &&
- rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
- base_msr = MSR_IA32_PMC0;
- else
- base_msr = MSR_IA32_PERFCTR0;
-
- guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
- GUEST_DONE();
-}
-
-static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t nr_gp_counters)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
- pmu_version, perf_capabilities);
-
- vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
- nr_gp_counters);
-
- run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-static void guest_test_fixed_counters(void)
-{
- uint64_t supported_bitmask = 0;
- uint8_t nr_fixed_counters = 0;
- uint8_t i;
-
- /* Fixed counters require Architectural vPMU Version 2+. */
- if (guest_get_pmu_version() >= 2)
- nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-
- /*
- * The supported bitmask for fixed counters was introduced in PMU
- * version 5.
- */
- if (guest_get_pmu_version() >= 5)
- supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
-
- guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
- nr_fixed_counters, supported_bitmask);
-
- for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
- uint8_t vector;
- uint64_t val;
-
- if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
- vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
- FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- __GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
-
- vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
- FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
- __GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
- continue;
- }
-
- wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
- wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
- __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
-
- GUEST_ASSERT_NE(val, 0);
- }
- GUEST_DONE();
-}
-
-static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t nr_fixed_counters,
- uint32_t supported_bitmask)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
- pmu_version, perf_capabilities);
-
- vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
- supported_bitmask);
- vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
- nr_fixed_counters);
-
- run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-static void test_intel_counters(void)
-{
- uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
- uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
- uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
- uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
- unsigned int i;
- uint8_t v, j;
- uint32_t k;
-
- const uint64_t perf_caps[] = {
- 0,
- PMU_CAP_FW_WRITES,
- };
-
- /*
- * Test up to PMU v5, which is the current maximum version defined by
- * Intel, i.e. is the last version that is guaranteed to be backwards
- * compatible with KVM's existing behavior.
- */
- uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
-
- /*
- * Detect the existence of events that aren't supported by selftests.
- * This will (obviously) fail any time the kernel adds support for a
- * new event, but it's worth paying that price to keep the test fresh.
- */
- TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
- "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
- nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
-
- /*
- * Force iterating over known arch events regardless of whether or not
- * KVM/hardware supports a given event.
- */
- nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
-
- for (v = 0; v <= max_pmu_version; v++) {
- for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
- if (!kvm_has_perf_caps && perf_caps[i])
- continue;
-
- pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
- v, perf_caps[i]);
- /*
- * To keep the total runtime reasonable, test every
- * possible non-zero, non-reserved bitmap combination
- * only with the native PMU version and the full bit
- * vector length.
- */
- if (v == pmu_version) {
- for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
- test_arch_events(v, perf_caps[i], nr_arch_events, k);
- }
- /*
- * Test single bits for all PMU version and lengths up
- * the number of events +1 (to verify KVM doesn't do
- * weird things if the guest length is greater than the
- * host length). Explicitly test a mask of '0' and all
- * ones i.e. all events being available and unavailable.
- */
- for (j = 0; j <= nr_arch_events + 1; j++) {
- test_arch_events(v, perf_caps[i], j, 0);
- test_arch_events(v, perf_caps[i], j, 0xff);
-
- for (k = 0; k < nr_arch_events; k++)
- test_arch_events(v, perf_caps[i], j, BIT(k));
- }
-
- pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
- v, perf_caps[i]);
- for (j = 0; j <= nr_gp_counters; j++)
- test_gp_counters(v, perf_caps[i], j);
-
- pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
- v, perf_caps[i]);
- for (j = 0; j <= nr_fixed_counters; j++) {
- for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
- test_fixed_counters(v, perf_caps[i], j, k);
- }
- }
- }
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_is_pmu_enabled());
-
- TEST_REQUIRE(host_cpu_is_intel);
- TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
- TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
-
- kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
- kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
-
- test_intel_counters();
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_SET_PMU_EVENT_FILTER.
- *
- * Copyright (C) 2022, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies the expected behavior of allow lists and deny lists for
- * virtual PMU events.
- */
-#include "kvm_util.h"
-#include "pmu.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define NUM_BRANCHES 42
-#define MAX_TEST_EVENTS 10
-
-#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
-#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
-#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
-
-struct __kvm_pmu_event_filter {
- __u32 action;
- __u32 nevents;
- __u32 fixed_counter_bitmap;
- __u32 flags;
- __u32 pad[4];
- __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
-};
-
-/*
- * This event list comprises Intel's known architectural events, plus AMD's
- * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the
- * same encoding for Instructions Retired.
- */
-kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
-
-static const struct __kvm_pmu_event_filter base_event_filter = {
- .nevents = ARRAY_SIZE(base_event_filter.events),
- .events = {
- INTEL_ARCH_CPU_CYCLES,
- INTEL_ARCH_INSTRUCTIONS_RETIRED,
- INTEL_ARCH_REFERENCE_CYCLES,
- INTEL_ARCH_LLC_REFERENCES,
- INTEL_ARCH_LLC_MISSES,
- INTEL_ARCH_BRANCHES_RETIRED,
- INTEL_ARCH_BRANCHES_MISPREDICTED,
- INTEL_ARCH_TOPDOWN_SLOTS,
- AMD_ZEN_BRANCHES_RETIRED,
- },
-};
-
-struct {
- uint64_t loads;
- uint64_t stores;
- uint64_t loads_stores;
- uint64_t branches_retired;
- uint64_t instructions_retired;
-} pmc_results;
-
-/*
- * If we encounter a #GP during the guest PMU sanity check, then the guest
- * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
- */
-static void guest_gp_handler(struct ex_regs *regs)
-{
- GUEST_SYNC(-EFAULT);
-}
-
-/*
- * Check that we can write a new value to the given MSR and read it back.
- * The caller should provide a non-empty set of bits that are safe to flip.
- *
- * Return on success. GUEST_SYNC(0) on error.
- */
-static void check_msr(uint32_t msr, uint64_t bits_to_flip)
-{
- uint64_t v = rdmsr(msr) ^ bits_to_flip;
-
- wrmsr(msr, v);
- if (rdmsr(msr) != v)
- GUEST_SYNC(-EIO);
-
- v ^= bits_to_flip;
- wrmsr(msr, v);
- if (rdmsr(msr) != v)
- GUEST_SYNC(-EIO);
-}
-
-static void run_and_measure_loop(uint32_t msr_base)
-{
- const uint64_t branches_retired = rdmsr(msr_base + 0);
- const uint64_t insn_retired = rdmsr(msr_base + 1);
-
- __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
-
- pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
- pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
-}
-
-static void intel_guest_code(void)
-{
- check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
- check_msr(MSR_P6_EVNTSEL0, 0xffff);
- check_msr(MSR_IA32_PMC0, 0xffff);
- GUEST_SYNC(0);
-
- for (;;) {
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
- wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
-
- run_and_measure_loop(MSR_IA32_PMC0);
- GUEST_SYNC(0);
- }
-}
-
-/*
- * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
- * this code uses the always-available, legacy K7 PMU MSRs, which alias to
- * the first four of the six extended core PMU MSRs.
- */
-static void amd_guest_code(void)
-{
- check_msr(MSR_K7_EVNTSEL0, 0xffff);
- check_msr(MSR_K7_PERFCTR0, 0xffff);
- GUEST_SYNC(0);
-
- for (;;) {
- wrmsr(MSR_K7_EVNTSEL0, 0);
- wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
- wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
-
- run_and_measure_loop(MSR_K7_PERFCTR0);
- GUEST_SYNC(0);
- }
-}
-
-/*
- * Run the VM to the next GUEST_SYNC(value), and return the value passed
- * to the sync. Any other exit from the guest is fatal.
- */
-static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- get_ucall(vcpu, &uc);
- TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
- return uc.args[1];
-}
-
-static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
-{
- uint64_t r;
-
- memset(&pmc_results, 0, sizeof(pmc_results));
- sync_global_to_guest(vcpu->vm, pmc_results);
-
- r = run_vcpu_to_sync(vcpu);
- TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
-
- sync_global_from_guest(vcpu->vm, pmc_results);
-}
-
-/*
- * In a nested environment or if the vPMU is disabled, the guest PMU
- * might not work as architected (accessing the PMU MSRs may raise
- * #GP, or writes could simply be discarded). In those situations,
- * there is no point in running these tests. The guest code will perform
- * a sanity check and then GUEST_SYNC(success). In the case of failure,
- * the behavior of the guest on resumption is undefined.
- */
-static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
-{
- uint64_t r;
-
- vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
- r = run_vcpu_to_sync(vcpu);
- vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
-
- return !r;
-}
-
-/*
- * Remove the first occurrence of 'event' (if any) from the filter's
- * event list.
- */
-static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
-{
- bool found = false;
- int i;
-
- for (i = 0; i < f->nevents; i++) {
- if (found)
- f->events[i - 1] = f->events[i];
- else
- found = f->events[i] == event;
- }
- if (found)
- f->nevents--;
-}
-
-#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
-do { \
- uint64_t br = pmc_results.branches_retired; \
- uint64_t ir = pmc_results.instructions_retired; \
- \
- if (br && br != NUM_BRANCHES) \
- pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
- __func__, br, NUM_BRANCHES); \
- TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
- __func__, br); \
- TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
- __func__, ir); \
-} while (0)
-
-#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
-do { \
- uint64_t br = pmc_results.branches_retired; \
- uint64_t ir = pmc_results.instructions_retired; \
- \
- TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
- __func__, br); \
- TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
- __func__, ir); \
-} while (0)
-
-static void test_without_filter(struct kvm_vcpu *vcpu)
-{
- run_vcpu_and_sync_pmc_results(vcpu);
-
- ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_with_filter(struct kvm_vcpu *vcpu,
- struct __kvm_pmu_event_filter *__f)
-{
- struct kvm_pmu_event_filter *f = (void *)__f;
-
- vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
- run_vcpu_and_sync_pmc_results(vcpu);
-}
-
-static void test_amd_deny_list(struct kvm_vcpu *vcpu)
-{
- struct __kvm_pmu_event_filter f = {
- .action = KVM_PMU_EVENT_DENY,
- .nevents = 1,
- .events = {
- RAW_EVENT(0x1C2, 0),
- },
- };
-
- test_with_filter(vcpu, &f);
-
- ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_member_deny_list(struct kvm_vcpu *vcpu)
-{
- struct __kvm_pmu_event_filter f = base_event_filter;
-
- f.action = KVM_PMU_EVENT_DENY;
- test_with_filter(vcpu, &f);
-
- ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
-}
-
-static void test_member_allow_list(struct kvm_vcpu *vcpu)
-{
- struct __kvm_pmu_event_filter f = base_event_filter;
-
- f.action = KVM_PMU_EVENT_ALLOW;
- test_with_filter(vcpu, &f);
-
- ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
-{
- struct __kvm_pmu_event_filter f = base_event_filter;
-
- f.action = KVM_PMU_EVENT_DENY;
-
- remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
- remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
- remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
- test_with_filter(vcpu, &f);
-
- ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
-{
- struct __kvm_pmu_event_filter f = base_event_filter;
-
- f.action = KVM_PMU_EVENT_ALLOW;
-
- remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
- remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
- remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
- test_with_filter(vcpu, &f);
-
- ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
-}
-
-/*
- * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
- *
- * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
- */
-static void test_pmu_config_disable(void (*guest_code)(void))
-{
- struct kvm_vcpu *vcpu;
- int r;
- struct kvm_vm *vm;
-
- r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
- if (!(r & KVM_PMU_CAP_DISABLE))
- return;
-
- vm = vm_create(1);
-
- vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
-
- vcpu = vm_vcpu_add(vm, 0, guest_code);
- TEST_ASSERT(!sanity_check_pmu(vcpu),
- "Guest should not be able to use disabled PMU.");
-
- kvm_vm_free(vm);
-}
-
-/*
- * On Intel, check for a non-zero PMU version, at least one general-purpose
- * counter per logical processor, and support for counting the number of branch
- * instructions retired.
- */
-static bool use_intel_pmu(void)
-{
- return host_cpu_is_intel &&
- kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
- kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
- kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
-}
-
-/*
- * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
- * 0xc2,0 for Branch Instructions Retired.
- */
-static bool use_amd_pmu(void)
-{
- return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
-}
-
-/*
- * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
- * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
- * supported on Intel Xeon processors:
- * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
- */
-#define MEM_INST_RETIRED 0xD0
-#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81)
-#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82)
-#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83)
-
-static bool supports_event_mem_inst_retired(void)
-{
- uint32_t eax, ebx, ecx, edx;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
- if (x86_family(eax) == 0x6) {
- switch (x86_model(eax)) {
- /* Sapphire Rapids */
- case 0x8F:
- /* Ice Lake */
- case 0x6A:
- /* Skylake */
- /* Cascade Lake */
- case 0x55:
- return true;
- }
- }
-
- return false;
-}
-
-/*
- * "LS Dispatch", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- */
-#define LS_DISPATCH 0x29
-#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0))
-#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1))
-#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
-
-#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
- KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
-#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
- KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
-
-static void masked_events_guest_test(uint32_t msr_base)
-{
- /*
- * The actual value of the counters don't determine the outcome of
- * the test. Only that they are zero or non-zero.
- */
- const uint64_t loads = rdmsr(msr_base + 0);
- const uint64_t stores = rdmsr(msr_base + 1);
- const uint64_t loads_stores = rdmsr(msr_base + 2);
- int val;
-
-
- __asm__ __volatile__("movl $0, %[v];"
- "movl %[v], %%eax;"
- "incl %[v];"
- : [v]"+m"(val) :: "eax");
-
- pmc_results.loads = rdmsr(msr_base + 0) - loads;
- pmc_results.stores = rdmsr(msr_base + 1) - stores;
- pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
-}
-
-static void intel_masked_events_guest_code(void)
-{
- for (;;) {
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
- wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
- wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
- wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
-
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
-
- masked_events_guest_test(MSR_IA32_PMC0);
- GUEST_SYNC(0);
- }
-}
-
-static void amd_masked_events_guest_code(void)
-{
- for (;;) {
- wrmsr(MSR_K7_EVNTSEL0, 0);
- wrmsr(MSR_K7_EVNTSEL1, 0);
- wrmsr(MSR_K7_EVNTSEL2, 0);
-
- wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
- wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
- wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
- ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
-
- masked_events_guest_test(MSR_K7_PERFCTR0);
- GUEST_SYNC(0);
- }
-}
-
-static void run_masked_events_test(struct kvm_vcpu *vcpu,
- const uint64_t masked_events[],
- const int nmasked_events)
-{
- struct __kvm_pmu_event_filter f = {
- .nevents = nmasked_events,
- .action = KVM_PMU_EVENT_ALLOW,
- .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
- };
-
- memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
- test_with_filter(vcpu, &f);
-}
-
-#define ALLOW_LOADS BIT(0)
-#define ALLOW_STORES BIT(1)
-#define ALLOW_LOADS_STORES BIT(2)
-
-struct masked_events_test {
- uint64_t intel_events[MAX_TEST_EVENTS];
- uint64_t intel_event_end;
- uint64_t amd_events[MAX_TEST_EVENTS];
- uint64_t amd_event_end;
- const char *msg;
- uint32_t flags;
-};
-
-/*
- * These are the test cases for the masked events tests.
- *
- * For each test, the guest enables 3 PMU counters (loads, stores,
- * loads + stores). The filter is then set in KVM with the masked events
- * provided. The test then verifies that the counters agree with which
- * ones should be counting and which ones should be filtered.
- */
-const struct masked_events_test test_cases[] = {
- {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
- },
- .msg = "Only allow loads.",
- .flags = ALLOW_LOADS,
- }, {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
- },
- .msg = "Only allow stores.",
- .flags = ALLOW_STORES,
- }, {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
- },
- .msg = "Only allow loads + stores.",
- .flags = ALLOW_LOADS_STORES,
- }, {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
- EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
- },
- .msg = "Only allow loads and stores.",
- .flags = ALLOW_LOADS | ALLOW_STORES,
- }, {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
- EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
- EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
- },
- .msg = "Only allow loads and loads + stores.",
- .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
- }, {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
- EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
- },
- .msg = "Only allow stores and loads + stores.",
- .flags = ALLOW_STORES | ALLOW_LOADS_STORES
- }, {
- .intel_events = {
- INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
- },
- .amd_events = {
- INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
- },
- .msg = "Only allow loads, stores, and loads + stores.",
- .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
- },
-};
-
-static int append_test_events(const struct masked_events_test *test,
- uint64_t *events, int nevents)
-{
- const uint64_t *evts;
- int i;
-
- evts = use_intel_pmu() ? test->intel_events : test->amd_events;
- for (i = 0; i < MAX_TEST_EVENTS; i++) {
- if (evts[i] == 0)
- break;
-
- events[nevents + i] = evts[i];
- }
-
- return nevents + i;
-}
-
-static bool bool_eq(bool a, bool b)
-{
- return a == b;
-}
-
-static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
- int nevents)
-{
- int ntests = ARRAY_SIZE(test_cases);
- int i, n;
-
- for (i = 0; i < ntests; i++) {
- const struct masked_events_test *test = &test_cases[i];
-
- /* Do any test case events overflow MAX_TEST_EVENTS? */
- assert(test->intel_event_end == 0);
- assert(test->amd_event_end == 0);
-
- n = append_test_events(test, events, nevents);
-
- run_masked_events_test(vcpu, events, n);
-
- TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
- bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
- bool_eq(pmc_results.loads_stores,
- test->flags & ALLOW_LOADS_STORES),
- "%s loads: %lu, stores: %lu, loads + stores: %lu",
- test->msg, pmc_results.loads, pmc_results.stores,
- pmc_results.loads_stores);
- }
-}
-
-static void add_dummy_events(uint64_t *events, int nevents)
-{
- int i;
-
- for (i = 0; i < nevents; i++) {
- int event_select = i % 0xFF;
- bool exclude = ((i % 4) == 0);
-
- if (event_select == MEM_INST_RETIRED ||
- event_select == LS_DISPATCH)
- event_select++;
-
- events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
- 0, exclude);
- }
-}
-
-static void test_masked_events(struct kvm_vcpu *vcpu)
-{
- int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
- uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
-
- /* Run the test cases against a sparse PMU event filter. */
- run_masked_events_tests(vcpu, events, 0);
-
- /* Run the test cases against a dense PMU event filter. */
- add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
- run_masked_events_tests(vcpu, events, nevents);
-}
-
-static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
- struct __kvm_pmu_event_filter *__f)
-{
- struct kvm_pmu_event_filter *f = (void *)__f;
-
- return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-}
-
-static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
- uint32_t flags, uint32_t action)
-{
- struct __kvm_pmu_event_filter f = {
- .nevents = 1,
- .flags = flags,
- .action = action,
- .events = {
- event,
- },
- };
-
- return set_pmu_event_filter(vcpu, &f);
-}
-
-static void test_filter_ioctl(struct kvm_vcpu *vcpu)
-{
- uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
- struct __kvm_pmu_event_filter f;
- uint64_t e = ~0ul;
- int r;
-
- /*
- * Unfortunately having invalid bits set in event data is expected to
- * pass when flags == 0 (bits other than eventsel+umask).
- */
- r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
- TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
-
- r = set_pmu_single_event_filter(vcpu, e,
- KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
- KVM_PMU_EVENT_ALLOW);
- TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
-
- e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
- r = set_pmu_single_event_filter(vcpu, e,
- KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
- KVM_PMU_EVENT_ALLOW);
- TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
-
- f = base_event_filter;
- f.action = PMU_EVENT_FILTER_INVALID_ACTION;
- r = set_pmu_event_filter(vcpu, &f);
- TEST_ASSERT(r, "Set invalid action is expected to fail");
-
- f = base_event_filter;
- f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
- r = set_pmu_event_filter(vcpu, &f);
- TEST_ASSERT(r, "Set invalid flags is expected to fail");
-
- f = base_event_filter;
- f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
- r = set_pmu_event_filter(vcpu, &f);
- TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
-
- f = base_event_filter;
- f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
- r = set_pmu_event_filter(vcpu, &f);
- TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
-}
-
-static void intel_run_fixed_counter_guest_code(uint8_t idx)
-{
- for (;;) {
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
-
- /* Only OS_EN bit is enabled for fixed counter[idx]. */
- wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
- __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
- GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
- }
-}
-
-static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
- uint32_t action, uint32_t bitmap)
-{
- struct __kvm_pmu_event_filter f = {
- .action = action,
- .fixed_counter_bitmap = bitmap,
- };
- set_pmu_event_filter(vcpu, &f);
-
- return run_vcpu_to_sync(vcpu);
-}
-
-static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
- uint32_t action,
- uint32_t bitmap)
-{
- struct __kvm_pmu_event_filter f = base_event_filter;
-
- f.action = action;
- f.fixed_counter_bitmap = bitmap;
- set_pmu_event_filter(vcpu, &f);
-
- return run_vcpu_to_sync(vcpu);
-}
-
-static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
- uint8_t nr_fixed_counters)
-{
- unsigned int i;
- uint32_t bitmap;
- uint64_t count;
-
- TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
- "Invalid nr_fixed_counters");
-
- /*
- * Check the fixed performance counter can count normally when KVM
- * userspace doesn't set any pmu filter.
- */
- count = run_vcpu_to_sync(vcpu);
- TEST_ASSERT(count, "Unexpected count value: %ld", count);
-
- for (i = 0; i < BIT(nr_fixed_counters); i++) {
- bitmap = BIT(i);
- count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
- bitmap);
- TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
-
- count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
- bitmap);
- TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
-
- /*
- * Check that fixed_counter_bitmap has higher priority than
- * events[] when both are set.
- */
- count = test_set_gp_and_fixed_event_filter(vcpu,
- KVM_PMU_EVENT_ALLOW,
- bitmap);
- TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
-
- count = test_set_gp_and_fixed_event_filter(vcpu,
- KVM_PMU_EVENT_DENY,
- bitmap);
- TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
- }
-}
-
-static void test_fixed_counter_bitmap(void)
-{
- uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- uint8_t idx;
-
- /*
- * Check that pmu_event_filter works as expected when it's applied to
- * fixed performance counters.
- */
- for (idx = 0; idx < nr_fixed_counters; idx++) {
- vm = vm_create_with_one_vcpu(&vcpu,
- intel_run_fixed_counter_guest_code);
- vcpu_args_set(vcpu, 1, idx);
- __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
- kvm_vm_free(vm);
- }
-}
-
-int main(int argc, char *argv[])
-{
- void (*guest_code)(void);
- struct kvm_vcpu *vcpu, *vcpu2 = NULL;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_is_pmu_enabled());
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
-
- TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
- guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- TEST_REQUIRE(sanity_check_pmu(vcpu));
-
- if (use_amd_pmu())
- test_amd_deny_list(vcpu);
-
- test_without_filter(vcpu);
- test_member_deny_list(vcpu);
- test_member_allow_list(vcpu);
- test_not_member_deny_list(vcpu);
- test_not_member_allow_list(vcpu);
-
- if (use_intel_pmu() &&
- supports_event_mem_inst_retired() &&
- kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
- vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
- else if (use_amd_pmu())
- vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
-
- if (vcpu2)
- test_masked_events(vcpu2);
- test_filter_ioctl(vcpu);
-
- kvm_vm_free(vm);
-
- test_pmu_config_disable(guest_code);
- test_fixed_counter_bitmap();
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <limits.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/kvm_para.h>
-#include <linux/memfd.h>
-#include <linux/sizes.h>
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-#define BASE_DATA_SLOT 10
-#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
-#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
-
-/* Horrific macro so that the line info is captured accurately :-( */
-#define memcmp_g(gpa, pattern, size) \
-do { \
- uint8_t *mem = (uint8_t *)gpa; \
- size_t i; \
- \
- for (i = 0; i < size; i++) \
- __GUEST_ASSERT(mem[i] == pattern, \
- "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
- pattern, i, gpa + i, mem[i]); \
-} while (0)
-
-static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
-{
- size_t i;
-
- for (i = 0; i < size; i++)
- TEST_ASSERT(mem[i] == pattern,
- "Host expected 0x%x at gpa 0x%lx, got 0x%x",
- pattern, gpa + i, mem[i]);
-}
-
-/*
- * Run memory conversion tests with explicit conversion:
- * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
- * to back/unback private memory. Subsequent accesses by guest to the gpa range
- * will not cause exit to userspace.
- *
- * Test memory conversion scenarios with following steps:
- * 1) Access private memory using private access and verify that memory contents
- * are not visible to userspace.
- * 2) Convert memory to shared using explicit conversions and ensure that
- * userspace is able to access the shared regions.
- * 3) Convert memory back to private using explicit conversions and ensure that
- * userspace is again not able to access converted private regions.
- */
-
-#define GUEST_STAGE(o, s) { .offset = o, .size = s }
-
-enum ucall_syncs {
- SYNC_SHARED,
- SYNC_PRIVATE,
-};
-
-static void guest_sync_shared(uint64_t gpa, uint64_t size,
- uint8_t current_pattern, uint8_t new_pattern)
-{
- GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
-}
-
-static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
-{
- GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
-}
-
-/* Arbitrary values, KVM doesn't care about the attribute flags. */
-#define MAP_GPA_SET_ATTRIBUTES BIT(0)
-#define MAP_GPA_SHARED BIT(1)
-#define MAP_GPA_DO_FALLOCATE BIT(2)
-
-static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
- bool do_fallocate)
-{
- uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
-
- if (map_shared)
- flags |= MAP_GPA_SHARED;
- if (do_fallocate)
- flags |= MAP_GPA_DO_FALLOCATE;
- kvm_hypercall_map_gpa_range(gpa, size, flags);
-}
-
-static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
-{
- guest_map_mem(gpa, size, true, do_fallocate);
-}
-
-static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
-{
- guest_map_mem(gpa, size, false, do_fallocate);
-}
-
-struct {
- uint64_t offset;
- uint64_t size;
-} static const test_ranges[] = {
- GUEST_STAGE(0, PAGE_SIZE),
- GUEST_STAGE(0, SZ_2M),
- GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
- GUEST_STAGE(PAGE_SIZE, SZ_2M),
- GUEST_STAGE(SZ_2M, PAGE_SIZE),
-};
-
-static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
-{
- const uint8_t def_p = 0xaa;
- const uint8_t init_p = 0xcc;
- uint64_t j;
- int i;
-
- /* Memory should be shared by default. */
- memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
- memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
- guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
-
- memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
-
- for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
- uint64_t gpa = base_gpa + test_ranges[i].offset;
- uint64_t size = test_ranges[i].size;
- uint8_t p1 = 0x11;
- uint8_t p2 = 0x22;
- uint8_t p3 = 0x33;
- uint8_t p4 = 0x44;
-
- /*
- * Set the test region to pattern one to differentiate it from
- * the data range as a whole (contains the initial pattern).
- */
- memset((void *)gpa, p1, size);
-
- /*
- * Convert to private, set and verify the private data, and
- * then verify that the rest of the data (map shared) still
- * holds the initial pattern, and that the host always sees the
- * shared memory (initial pattern). Unlike shared memory,
- * punching a hole in private memory is destructive, i.e.
- * previous values aren't guaranteed to be preserved.
- */
- guest_map_private(gpa, size, do_fallocate);
-
- if (size > PAGE_SIZE) {
- memset((void *)gpa, p2, PAGE_SIZE);
- goto skip;
- }
-
- memset((void *)gpa, p2, size);
- guest_sync_private(gpa, size, p1);
-
- /*
- * Verify that the private memory was set to pattern two, and
- * that shared memory still holds the initial pattern.
- */
- memcmp_g(gpa, p2, size);
- if (gpa > base_gpa)
- memcmp_g(base_gpa, init_p, gpa - base_gpa);
- if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
- memcmp_g(gpa + size, init_p,
- (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
-
- /*
- * Convert odd-number page frames back to shared to verify KVM
- * also correctly handles holes in private ranges.
- */
- for (j = 0; j < size; j += PAGE_SIZE) {
- if ((j >> PAGE_SHIFT) & 1) {
- guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
- guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
-
- memcmp_g(gpa + j, p3, PAGE_SIZE);
- } else {
- guest_sync_private(gpa + j, PAGE_SIZE, p1);
- }
- }
-
-skip:
- /*
- * Convert the entire region back to shared, explicitly write
- * pattern three to fill in the even-number frames before
- * asking the host to verify (and write pattern four).
- */
- guest_map_shared(gpa, size, do_fallocate);
- memset((void *)gpa, p3, size);
- guest_sync_shared(gpa, size, p3, p4);
- memcmp_g(gpa, p4, size);
-
- /* Reset the shared memory back to the initial pattern. */
- memset((void *)gpa, init_p, size);
-
- /*
- * Free (via PUNCH_HOLE) *all* private memory so that the next
- * iteration starts from a clean slate, e.g. with respect to
- * whether or not there are pages/folios in guest_mem.
- */
- guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
- }
-}
-
-static void guest_punch_hole(uint64_t gpa, uint64_t size)
-{
- /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
- uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
-
- kvm_hypercall_map_gpa_range(gpa, size, flags);
-}
-
-/*
- * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
- * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
- * (subsequent fault) should zero memory.
- */
-static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
-{
- const uint8_t init_p = 0xcc;
- int i;
-
- /*
- * Convert the entire range to private, this testcase is all about
- * punching holes in guest_memfd, i.e. shared mappings aren't needed.
- */
- guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
-
- for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
- uint64_t gpa = base_gpa + test_ranges[i].offset;
- uint64_t size = test_ranges[i].size;
-
- /*
- * Free all memory before each iteration, even for the !precise
- * case where the memory will be faulted back in. Freeing and
- * reallocating should obviously work, and freeing all memory
- * minimizes the probability of cross-testcase influence.
- */
- guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
-
- /* Fault-in and initialize memory, and verify the pattern. */
- if (precise) {
- memset((void *)gpa, init_p, size);
- memcmp_g(gpa, init_p, size);
- } else {
- memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
- memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
- }
-
- /*
- * Punch a hole at the target range and verify that reads from
- * the guest succeed and return zeroes.
- */
- guest_punch_hole(gpa, size);
- memcmp_g(gpa, 0, size);
- }
-}
-
-static void guest_code(uint64_t base_gpa)
-{
- /*
- * Run the conversion test twice, with and without doing fallocate() on
- * the guest_memfd backing when converting between shared and private.
- */
- guest_test_explicit_conversion(base_gpa, false);
- guest_test_explicit_conversion(base_gpa, true);
-
- /*
- * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
- * faulted in, once with only the target range faulted in.
- */
- guest_test_punch_hole(base_gpa, false);
- guest_test_punch_hole(base_gpa, true);
- GUEST_DONE();
-}
-
-static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
- uint64_t gpa = run->hypercall.args[0];
- uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
- bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
- bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
- bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
- struct kvm_vm *vm = vcpu->vm;
-
- TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
- "Wanted MAP_GPA_RANGE (%u), got '%llu'",
- KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
-
- if (do_fallocate)
- vm_guest_mem_fallocate(vm, gpa, size, map_shared);
-
- if (set_attributes)
- vm_set_memory_attributes(vm, gpa, size,
- map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
- run->hypercall.ret = 0;
-}
-
-static bool run_vcpus;
-
-static void *__test_mem_conversions(void *__vcpu)
-{
- struct kvm_vcpu *vcpu = __vcpu;
- struct kvm_run *run = vcpu->run;
- struct kvm_vm *vm = vcpu->vm;
- struct ucall uc;
-
- while (!READ_ONCE(run_vcpus))
- ;
-
- for ( ;; ) {
- vcpu_run(vcpu);
-
- if (run->exit_reason == KVM_EXIT_HYPERCALL) {
- handle_exit_hypercall(vcpu);
- continue;
- }
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
- run->exit_reason, exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- case UCALL_SYNC: {
- uint64_t gpa = uc.args[1];
- size_t size = uc.args[2];
- size_t i;
-
- TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
- uc.args[0] == SYNC_PRIVATE,
- "Unknown sync command '%ld'", uc.args[0]);
-
- for (i = 0; i < size; i += vm->page_size) {
- size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
- uint8_t *hva = addr_gpa2hva(vm, gpa + i);
-
- /* In all cases, the host should observe the shared data. */
- memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
-
- /* For shared, write the new pattern to guest memory. */
- if (uc.args[0] == SYNC_SHARED)
- memset(hva, uc.args[4], nr_bytes);
- }
- break;
- }
- case UCALL_DONE:
- return NULL;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
- }
-}
-
-static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
- uint32_t nr_memslots)
-{
- /*
- * Allocate enough memory so that each vCPU's chunk of memory can be
- * naturally aligned with respect to the size of the backing store.
- */
- const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
- const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
- const size_t memfd_size = per_cpu_size * nr_vcpus;
- const size_t slot_size = memfd_size / nr_memslots;
- struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
- pthread_t threads[KVM_MAX_VCPUS];
- struct kvm_vm *vm;
- int memfd, i, r;
-
- const struct vm_shape shape = {
- .mode = VM_MODE_DEFAULT,
- .type = KVM_X86_SW_PROTECTED_VM,
- };
-
- TEST_ASSERT(slot_size * nr_memslots == memfd_size,
- "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
- memfd_size, nr_memslots);
- vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
-
- vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
-
- memfd = vm_create_guest_memfd(vm, memfd_size, 0);
-
- for (i = 0; i < nr_memslots; i++)
- vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
- BASE_DATA_SLOT + i, slot_size / vm->page_size,
- KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
-
- for (i = 0; i < nr_vcpus; i++) {
- uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
-
- vcpu_args_set(vcpus[i], 1, gpa);
-
- /*
- * Map only what is needed so that an out-of-bounds access
- * results #PF => SHUTDOWN instead of data corruption.
- */
- virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
-
- pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
- }
-
- WRITE_ONCE(run_vcpus, true);
-
- for (i = 0; i < nr_vcpus; i++)
- pthread_join(threads[i], NULL);
-
- kvm_vm_free(vm);
-
- /*
- * Allocate and free memory from the guest_memfd after closing the VM
- * fd. The guest_memfd is gifted a reference to its owning VM, i.e.
- * should prevent the VM from being fully destroyed until the last
- * reference to the guest_memfd is also put.
- */
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- close(memfd);
-}
-
-static void usage(const char *cmd)
-{
- puts("");
- printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
- puts("");
- backing_src_help("-s");
- puts("");
- puts(" -n: specify the number of vcpus (default: 1)");
- puts("");
- puts(" -m: specify the number of memslots (default: 1)");
- puts("");
-}
-
-int main(int argc, char *argv[])
-{
- enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
- uint32_t nr_memslots = 1;
- uint32_t nr_vcpus = 1;
- int opt;
-
- TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
-
- while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
- switch (opt) {
- case 's':
- src_type = parse_backing_src_type(optarg);
- break;
- case 'n':
- nr_vcpus = atoi_positive("nr_vcpus", optarg);
- break;
- case 'm':
- nr_memslots = atoi_positive("nr_memslots", optarg);
- break;
- case 'h':
- default:
- usage(argv[0]);
- exit(0);
- }
- }
-
- test_mem_conversions(src_type, nr_vcpus, nr_memslots);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2023, Google LLC.
- */
-#include <linux/kvm.h>
-#include <pthread.h>
-#include <stdint.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-/* Arbitrarily selected to avoid overlaps with anything else */
-#define EXITS_TEST_GVA 0xc0000000
-#define EXITS_TEST_GPA EXITS_TEST_GVA
-#define EXITS_TEST_NPAGES 1
-#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
-#define EXITS_TEST_SLOT 10
-
-static uint64_t guest_repeatedly_read(void)
-{
- volatile uint64_t value;
-
- while (true)
- value = *((uint64_t *) EXITS_TEST_GVA);
-
- return value;
-}
-
-static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
-{
- int r;
-
- r = _vcpu_run(vcpu);
- if (r) {
- TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
- TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
- }
- return vcpu->run->exit_reason;
-}
-
-const struct vm_shape protected_vm_shape = {
- .mode = VM_MODE_DEFAULT,
- .type = KVM_X86_SW_PROTECTED_VM,
-};
-
-static void test_private_access_memslot_deleted(void)
-{
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- pthread_t vm_thread;
- void *thread_return;
- uint32_t exit_reason;
-
- vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
- guest_repeatedly_read);
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- EXITS_TEST_GPA, EXITS_TEST_SLOT,
- EXITS_TEST_NPAGES,
- KVM_MEM_GUEST_MEMFD);
-
- virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
-
- /* Request to access page privately */
- vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
-
- pthread_create(&vm_thread, NULL,
- (void *(*)(void *))run_vcpu_get_exit_reason,
- (void *)vcpu);
-
- vm_mem_region_delete(vm, EXITS_TEST_SLOT);
-
- pthread_join(vm_thread, &thread_return);
- exit_reason = (uint32_t)(uint64_t)thread_return;
-
- TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
- TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
- TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
- TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
-
- kvm_vm_free(vm);
-}
-
-static void test_private_access_memslot_not_private(void)
-{
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- uint32_t exit_reason;
-
- vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
- guest_repeatedly_read);
-
- /* Add a non-private memslot (flags = 0) */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- EXITS_TEST_GPA, EXITS_TEST_SLOT,
- EXITS_TEST_NPAGES, 0);
-
- virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
-
- /* Request to access page privately */
- vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
-
- exit_reason = run_vcpu_get_exit_reason(vcpu);
-
- TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
- TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
- TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
- TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
-
- test_private_access_memslot_deleted();
- test_private_access_memslot_not_private();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test edge cases and race conditions in kvm_recalculate_apic_map().
- */
-
-#include <sys/ioctl.h>
-#include <pthread.h>
-#include <time.h>
-
-#include "processor.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "apic.h"
-
-#define TIMEOUT 5 /* seconds */
-
-#define LAPIC_DISABLED 0
-#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
-#define MAX_XAPIC_ID 0xff
-
-static void *race(void *arg)
-{
- struct kvm_lapic_state lapic = {};
- struct kvm_vcpu *vcpu = arg;
-
- while (1) {
- /* Trigger kvm_recalculate_apic_map(). */
- vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
- pthread_testcancel();
- }
-
- return NULL;
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
- struct kvm_vcpu *vcpuN;
- struct kvm_vm *vm;
- pthread_t thread;
- time_t t;
- int i;
-
- kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
-
- /*
- * Create the max number of vCPUs supported by selftests so that KVM
- * has decent amount of work to do when recalculating the map, i.e. to
- * make the problematic window large enough to hit.
- */
- vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
-
- /*
- * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
- * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
- */
- for (i = 0; i < KVM_MAX_VCPUS; i++)
- vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
-
- TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
-
- vcpuN = vcpus[KVM_MAX_VCPUS - 1];
- for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
- vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
- vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
- }
-
- TEST_ASSERT_EQ(pthread_cancel(thread), 0);
- TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
-
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test that KVM_SET_BOOT_CPU_ID works as intended
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "apic.h"
-
-static void guest_bsp_vcpu(void *arg)
-{
- GUEST_SYNC(1);
-
- GUEST_ASSERT_NE(get_bsp_flag(), 0);
-
- GUEST_DONE();
-}
-
-static void guest_not_bsp_vcpu(void *arg)
-{
- GUEST_SYNC(1);
-
- GUEST_ASSERT_EQ(get_bsp_flag(), 0);
-
- GUEST_DONE();
-}
-
-static void test_set_invalid_bsp(struct kvm_vm *vm)
-{
- unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
- int r;
-
- if (max_vcpu_id) {
- r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
- TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
- }
-
- r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
- TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
-}
-
-static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
-{
- int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
- (void *)(unsigned long)vcpu->id);
-
- TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
- int stage;
-
- for (stage = 0; stage < 2; stage++) {
-
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1,
- "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
- test_set_bsp_busy(vcpu, "while running vm");
- break;
- case UCALL_DONE:
- TEST_ASSERT(stage == 1,
- "Expected GUEST_DONE in stage 2, got stage %d",
- stage);
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu->run->exit_reason));
- }
- }
-}
-
-static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
- struct kvm_vcpu *vcpus[])
-{
- struct kvm_vm *vm;
- uint32_t i;
-
- vm = vm_create(nr_vcpus);
-
- test_set_invalid_bsp(vm);
-
- vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
-
- for (i = 0; i < nr_vcpus; i++)
- vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
- guest_not_bsp_vcpu);
- return vm;
-}
-
-static void run_vm_bsp(uint32_t bsp_vcpu_id)
-{
- struct kvm_vcpu *vcpus[2];
- struct kvm_vm *vm;
-
- vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
-
- run_vcpu(vcpus[0]);
- run_vcpu(vcpus[1]);
-
- kvm_vm_free(vm);
-}
-
-static void check_set_bsp_busy(void)
-{
- struct kvm_vcpu *vcpus[2];
- struct kvm_vm *vm;
-
- vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
-
- test_set_bsp_busy(vcpus[1], "after adding vcpu");
-
- run_vcpu(vcpus[0]);
- run_vcpu(vcpus[1]);
-
- test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
-
- run_vm_bsp(0);
- run_vm_bsp(1);
- run_vm_bsp(0);
-
- check_set_bsp_busy();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_SET_SREGS tests
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This is a regression test for the bug fixed by the following commit:
- * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
- *
- * That bug allowed a user-mode program that called the KVM_SET_SREGS
- * ioctl to put a VCPU's local APIC into an invalid state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
-do { \
- struct kvm_sregs new; \
- int rc; \
- \
- /* Skip the sub-test, the feature/bit is supported. */ \
- if (orig.cr & bit) \
- break; \
- \
- memcpy(&new, &orig, sizeof(sregs)); \
- new.cr |= bit; \
- \
- rc = _vcpu_sregs_set(vcpu, &new); \
- TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
- \
- /* Sanity check that KVM didn't change anything. */ \
- vcpu_sregs_get(vcpu, &new); \
- TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
-} while (0)
-
-static uint64_t calc_supported_cr4_feature_bits(void)
-{
- uint64_t cr4;
-
- cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
- X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
- X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
- if (kvm_cpu_has(X86_FEATURE_UMIP))
- cr4 |= X86_CR4_UMIP;
- if (kvm_cpu_has(X86_FEATURE_LA57))
- cr4 |= X86_CR4_LA57;
- if (kvm_cpu_has(X86_FEATURE_VMX))
- cr4 |= X86_CR4_VMXE;
- if (kvm_cpu_has(X86_FEATURE_SMX))
- cr4 |= X86_CR4_SMXE;
- if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
- cr4 |= X86_CR4_FSGSBASE;
- if (kvm_cpu_has(X86_FEATURE_PCID))
- cr4 |= X86_CR4_PCIDE;
- if (kvm_cpu_has(X86_FEATURE_XSAVE))
- cr4 |= X86_CR4_OSXSAVE;
- if (kvm_cpu_has(X86_FEATURE_SMEP))
- cr4 |= X86_CR4_SMEP;
- if (kvm_cpu_has(X86_FEATURE_SMAP))
- cr4 |= X86_CR4_SMAP;
- if (kvm_cpu_has(X86_FEATURE_PKU))
- cr4 |= X86_CR4_PKE;
-
- return cr4;
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_sregs sregs;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t cr4;
- int rc, i;
-
- /*
- * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
- * use it to verify all supported CR4 bits can be set prior to defining
- * the vCPU model, i.e. without doing KVM_SET_CPUID2.
- */
- vm = vm_create_barebones();
- vcpu = __vm_vcpu_add(vm, 0);
-
- vcpu_sregs_get(vcpu, &sregs);
-
- sregs.cr0 = 0;
- sregs.cr4 |= calc_supported_cr4_feature_bits();
- cr4 = sregs.cr4;
-
- rc = _vcpu_sregs_set(vcpu, &sregs);
- TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
-
- vcpu_sregs_get(vcpu, &sregs);
- TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
- sregs.cr4, cr4);
-
- /* Verify all unsupported features are rejected by KVM. */
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
- TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
-
- for (i = 32; i < 64; i++)
- TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
-
- /* NW without CD is illegal, as is PG without PE. */
- TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
- TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
-
- kvm_vm_free(vm);
-
- /* Create a "real" VM and verify APIC_BASE can be set. */
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
- vcpu_sregs_get(vcpu, &sregs);
- sregs.apic_base = 1 << 10;
- rc = _vcpu_sregs_set(vcpu, &sregs);
- TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
- sregs.apic_base);
- sregs.apic_base = 1 << 11;
- rc = _vcpu_sregs_set(vcpu, &sregs);
- TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
- sregs.apic_base);
-
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kvm.h>
-#include <linux/psp-sev.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <pthread.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "kselftest.h"
-
-#define SVM_SEV_FEAT_DEBUG_SWAP 32u
-
-/*
- * Some features may have hidden dependencies, or may only work
- * for certain VM types. Err on the side of safety and don't
- * expect that all supported features can be passed one by one
- * to KVM_SEV_INIT2.
- *
- * (Well, right now there's only one...)
- */
-#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
-
-int kvm_fd;
-u64 supported_vmsa_features;
-bool have_sev_es;
-
-static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
-{
- struct kvm_sev_cmd cmd = {
- .id = cmd_id,
- .data = (uint64_t)data,
- .sev_fd = open_sev_dev_path_or_exit(),
- };
- int ret;
-
- ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
- TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
- "%d failed: fw error: %d\n",
- cmd_id, cmd.error);
-
- return ret;
-}
-
-static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
-{
- struct kvm_vm *vm;
- int ret;
-
- vm = vm_create_barebones_type(vm_type);
- ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
- TEST_ASSERT(ret == 0,
- "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
- ret, errno);
- kvm_vm_free(vm);
-}
-
-static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
-{
- struct kvm_vm *vm;
- int ret;
-
- vm = vm_create_barebones_type(vm_type);
- ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
- TEST_ASSERT(ret == -1 && errno == EINVAL,
- "KVM_SEV_INIT2 should fail, %s.",
- msg);
- kvm_vm_free(vm);
-}
-
-void test_vm_types(void)
-{
- test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
-
- /*
- * TODO: check that unsupported types cannot be created. Probably
- * a separate selftest.
- */
- if (have_sev_es)
- test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
-
- test_init2_invalid(0, &(struct kvm_sev_init){},
- "VM type is KVM_X86_DEFAULT_VM");
- if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
- test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
- "VM type is KVM_X86_SW_PROTECTED_VM");
-}
-
-void test_flags(uint32_t vm_type)
-{
- int i;
-
- for (i = 0; i < 32; i++)
- test_init2_invalid(vm_type,
- &(struct kvm_sev_init){ .flags = BIT(i) },
- "invalid flag");
-}
-
-void test_features(uint32_t vm_type, uint64_t supported_features)
-{
- int i;
-
- for (i = 0; i < 64; i++) {
- if (!(supported_features & BIT_ULL(i)))
- test_init2_invalid(vm_type,
- &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
- "unknown feature");
- else if (KNOWN_FEATURES & BIT_ULL(i))
- test_init2(vm_type,
- &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
- }
-}
-
-int main(int argc, char *argv[])
-{
- int kvm_fd = open_kvm_dev_path_or_exit();
- bool have_sev;
-
- TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
- KVM_X86_SEV_VMSA_FEATURES) == 0);
- kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
- KVM_X86_SEV_VMSA_FEATURES,
- &supported_vmsa_features);
-
- have_sev = kvm_cpu_has(X86_FEATURE_SEV);
- TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
- "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
- kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
-
- TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
- have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
-
- TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
- "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
- kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
-
- test_vm_types();
-
- test_flags(KVM_X86_SEV_VM);
- if (have_sev_es)
- test_flags(KVM_X86_SEV_ES_VM);
-
- test_features(KVM_X86_SEV_VM, 0);
- if (have_sev_es)
- test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kvm.h>
-#include <linux/psp-sev.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <pthread.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sev.h"
-#include "kselftest.h"
-
-#define NR_MIGRATE_TEST_VCPUS 4
-#define NR_MIGRATE_TEST_VMS 3
-#define NR_LOCK_TESTING_THREADS 3
-#define NR_LOCK_TESTING_ITERATIONS 10000
-
-bool have_sev_es;
-
-static struct kvm_vm *sev_vm_create(bool es)
-{
- struct kvm_vm *vm;
- int i;
-
- vm = vm_create_barebones();
- if (!es)
- sev_vm_init(vm);
- else
- sev_es_vm_init(vm);
-
- for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
- __vm_vcpu_add(vm, i);
-
- sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
-
- if (es)
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
- return vm;
-}
-
-static struct kvm_vm *aux_vm_create(bool with_vcpus)
-{
- struct kvm_vm *vm;
- int i;
-
- vm = vm_create_barebones();
- if (!with_vcpus)
- return vm;
-
- for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
- __vm_vcpu_add(vm, i);
-
- return vm;
-}
-
-static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
-{
- return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
-}
-
-
-static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
-{
- int ret;
-
- ret = __sev_migrate_from(dst, src);
- TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
-}
-
-static void test_sev_migrate_from(bool es)
-{
- struct kvm_vm *src_vm;
- struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
- int i, ret;
-
- src_vm = sev_vm_create(es);
- for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
- dst_vms[i] = aux_vm_create(true);
-
- /* Initial migration from the src to the first dst. */
- sev_migrate_from(dst_vms[0], src_vm);
-
- for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
- sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
-
- /* Migrate the guest back to the original VM. */
- ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
- TEST_ASSERT(ret == -1 && errno == EIO,
- "VM that was migrated from should be dead. ret %d, errno: %d", ret,
- errno);
-
- kvm_vm_free(src_vm);
- for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
- kvm_vm_free(dst_vms[i]);
-}
-
-struct locking_thread_input {
- struct kvm_vm *vm;
- struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
-};
-
-static void *locking_test_thread(void *arg)
-{
- int i, j;
- struct locking_thread_input *input = (struct locking_thread_input *)arg;
-
- for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
- j = i % NR_LOCK_TESTING_THREADS;
- __sev_migrate_from(input->vm, input->source_vms[j]);
- }
-
- return NULL;
-}
-
-static void test_sev_migrate_locking(void)
-{
- struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
- pthread_t pt[NR_LOCK_TESTING_THREADS];
- int i;
-
- for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
- input[i].vm = sev_vm_create(/* es= */ false);
- input[0].source_vms[i] = input[i].vm;
- }
- for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
- memcpy(input[i].source_vms, input[0].source_vms,
- sizeof(input[i].source_vms));
-
- for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
- pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
-
- for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
- pthread_join(pt[i], NULL);
- for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
- kvm_vm_free(input[i].vm);
-}
-
-static void test_sev_migrate_parameters(void)
-{
- struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
- *sev_es_vm_no_vmsa;
- int ret;
-
- vm_no_vcpu = vm_create_barebones();
- vm_no_sev = aux_vm_create(true);
- ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
- TEST_ASSERT(ret == -1 && errno == EINVAL,
- "Migrations require SEV enabled. ret %d, errno: %d", ret,
- errno);
-
- if (!have_sev_es)
- goto out;
-
- sev_vm = sev_vm_create(/* es= */ false);
- sev_es_vm = sev_vm_create(/* es= */ true);
- sev_es_vm_no_vmsa = vm_create_barebones();
- sev_es_vm_init(sev_es_vm_no_vmsa);
- __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
-
- ret = __sev_migrate_from(sev_vm, sev_es_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
- ret, errno);
-
- ret = __sev_migrate_from(sev_es_vm, sev_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
- ret, errno);
-
- ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
- ret, errno);
-
- ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
- ret, errno);
-
- kvm_vm_free(sev_vm);
- kvm_vm_free(sev_es_vm);
- kvm_vm_free(sev_es_vm_no_vmsa);
-out:
- kvm_vm_free(vm_no_vcpu);
- kvm_vm_free(vm_no_sev);
-}
-
-static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
-{
- return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
-}
-
-
-static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
-{
- int ret;
-
- ret = __sev_mirror_create(dst, src);
- TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
-}
-
-static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
-{
- struct kvm_sev_guest_status status;
- int cmd_id;
-
- for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
- int ret;
-
- /*
- * These commands are allowed for mirror VMs, all others are
- * not.
- */
- switch (cmd_id) {
- case KVM_SEV_LAUNCH_UPDATE_VMSA:
- case KVM_SEV_GUEST_STATUS:
- case KVM_SEV_DBG_DECRYPT:
- case KVM_SEV_DBG_ENCRYPT:
- continue;
- default:
- break;
- }
-
- /*
- * These commands should be disallowed before the data
- * parameter is examined so NULL is OK here.
- */
- ret = __vm_sev_ioctl(vm, cmd_id, NULL);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "Should not be able call command: %d. ret: %d, errno: %d",
- cmd_id, ret, errno);
- }
-
- vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-}
-
-static void test_sev_mirror(bool es)
-{
- struct kvm_vm *src_vm, *dst_vm;
- int i;
-
- src_vm = sev_vm_create(es);
- dst_vm = aux_vm_create(false);
-
- sev_mirror_create(dst_vm, src_vm);
-
- /* Check that we can complete creation of the mirror VM. */
- for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
- __vm_vcpu_add(dst_vm, i);
-
- if (es)
- vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-
- verify_mirror_allowed_cmds(dst_vm);
-
- kvm_vm_free(src_vm);
- kvm_vm_free(dst_vm);
-}
-
-static void test_sev_mirror_parameters(void)
-{
- struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
- int ret;
-
- sev_vm = sev_vm_create(/* es= */ false);
- vm_with_vcpu = aux_vm_create(true);
- vm_no_vcpu = aux_vm_create(false);
-
- ret = __sev_mirror_create(sev_vm, sev_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "Should not be able copy context to self. ret: %d, errno: %d",
- ret, errno);
-
- ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
- TEST_ASSERT(ret == -1 && errno == EINVAL,
- "Copy context requires SEV enabled. ret %d, errno: %d", ret,
- errno);
-
- ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
- ret, errno);
-
- if (!have_sev_es)
- goto out;
-
- sev_es_vm = sev_vm_create(/* es= */ true);
- ret = __sev_mirror_create(sev_vm, sev_es_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
- ret, errno);
-
- ret = __sev_mirror_create(sev_es_vm, sev_vm);
- TEST_ASSERT(
- ret == -1 && errno == EINVAL,
- "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
- ret, errno);
-
- kvm_vm_free(sev_es_vm);
-
-out:
- kvm_vm_free(sev_vm);
- kvm_vm_free(vm_with_vcpu);
- kvm_vm_free(vm_no_vcpu);
-}
-
-static void test_sev_move_copy(void)
-{
- struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
- *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
-
- sev_vm = sev_vm_create(/* es= */ false);
- dst_vm = aux_vm_create(true);
- dst2_vm = aux_vm_create(true);
- dst3_vm = aux_vm_create(true);
- mirror_vm = aux_vm_create(false);
- dst_mirror_vm = aux_vm_create(false);
- dst2_mirror_vm = aux_vm_create(false);
- dst3_mirror_vm = aux_vm_create(false);
-
- sev_mirror_create(mirror_vm, sev_vm);
-
- sev_migrate_from(dst_mirror_vm, mirror_vm);
- sev_migrate_from(dst_vm, sev_vm);
-
- sev_migrate_from(dst2_vm, dst_vm);
- sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
-
- sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
- sev_migrate_from(dst3_vm, dst2_vm);
-
- kvm_vm_free(dst_vm);
- kvm_vm_free(sev_vm);
- kvm_vm_free(dst2_vm);
- kvm_vm_free(dst3_vm);
- kvm_vm_free(mirror_vm);
- kvm_vm_free(dst_mirror_vm);
- kvm_vm_free(dst2_mirror_vm);
- kvm_vm_free(dst3_mirror_vm);
-
- /*
- * Run similar test be destroy mirrors before mirrored VMs to ensure
- * destruction is done safely.
- */
- sev_vm = sev_vm_create(/* es= */ false);
- dst_vm = aux_vm_create(true);
- mirror_vm = aux_vm_create(false);
- dst_mirror_vm = aux_vm_create(false);
-
- sev_mirror_create(mirror_vm, sev_vm);
-
- sev_migrate_from(dst_mirror_vm, mirror_vm);
- sev_migrate_from(dst_vm, sev_vm);
-
- kvm_vm_free(mirror_vm);
- kvm_vm_free(dst_mirror_vm);
- kvm_vm_free(dst_vm);
- kvm_vm_free(sev_vm);
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
-
- if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
- test_sev_migrate_from(/* es= */ false);
- if (have_sev_es)
- test_sev_migrate_from(/* es= */ true);
- test_sev_migrate_locking();
- test_sev_migrate_parameters();
- if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
- test_sev_move_copy();
- }
- if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
- test_sev_mirror(/* es= */ false);
- if (have_sev_es)
- test_sev_mirror(/* es= */ true);
- test_sev_mirror_parameters();
- }
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <math.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "linux/psp-sev.h"
-#include "sev.h"
-
-
-#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
-
-static void guest_sev_es_code(void)
-{
- /* TODO: Check CPUID after GHCB-based hypercall support is added. */
- GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
- GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
-
- /*
- * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
- * force "termination" to signal "done" via the GHCB MSR protocol.
- */
- wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
- __asm__ __volatile__("rep; vmmcall");
-}
-
-static void guest_sev_code(void)
-{
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
- GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
-
- GUEST_DONE();
-}
-
-/* Stash state passed via VMSA before any compiled code runs. */
-extern void guest_code_xsave(void);
-asm("guest_code_xsave:\n"
- "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
- "xor %edx, %edx\n"
- "xsave (%rdi)\n"
- "jmp guest_sev_es_code");
-
-static void compare_xsave(u8 *from_host, u8 *from_guest)
-{
- int i;
- bool bad = false;
- for (i = 0; i < 4095; i++) {
- if (from_host[i] != from_guest[i]) {
- printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
- bad = true;
- }
- }
-
- if (bad)
- abort();
-}
-
-static void test_sync_vmsa(uint32_t policy)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- vm_vaddr_t gva;
- void *hva;
-
- double x87val = M_PI;
- struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
-
- vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
- gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
- MEM_REGION_TEST_DATA);
- hva = addr_gva2hva(vm, gva);
-
- vcpu_args_set(vcpu, 1, gva);
-
- asm("fninit\n"
- "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
- "fldl %3\n"
- "xsave (%2)\n"
- "fstp %%st\n"
- : "=m"(xsave)
- : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
- : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
- vcpu_xsave_set(vcpu, &xsave);
-
- vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
-
- /* This page is shared, so make it decrypted. */
- memset(hva, 0, 4096);
-
- vcpu_run(vcpu);
-
- TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
- "Wanted SYSTEM_EVENT, got %s",
- exit_reason_str(vcpu->run->exit_reason));
- TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
- TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
- TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
-
- compare_xsave((u8 *)&xsave, (u8 *)hva);
-
- kvm_vm_free(vm);
-}
-
-static void test_sev(void *guest_code, uint64_t policy)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
-
- uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
- vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
-
- /* TODO: Validate the measurement is as expected. */
- vm_sev_launch(vm, policy, NULL);
-
- for (;;) {
- vcpu_run(vcpu);
-
- if (policy & SEV_POLICY_ES) {
- TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
- "Wanted SYSTEM_EVENT, got %s",
- exit_reason_str(vcpu->run->exit_reason));
- TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
- TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
- TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
- break;
- }
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- continue;
- case UCALL_DONE:
- return;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unexpected exit: %s",
- exit_reason_str(vcpu->run->exit_reason));
- }
- }
-
- kvm_vm_free(vm);
-}
-
-static void guest_shutdown_code(void)
-{
- struct desc_ptr idt;
-
- /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
- memset(&idt, 0, sizeof(idt));
- __asm__ __volatile__("lidt %0" :: "m"(idt));
-
- __asm__ __volatile__("ud2");
-}
-
-static void test_sev_es_shutdown(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- uint32_t type = KVM_X86_SEV_ES_VM;
-
- vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
-
- vm_sev_launch(vm, SEV_POLICY_ES, NULL);
-
- vcpu_run(vcpu);
- TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Wanted SHUTDOWN, got %s",
- exit_reason_str(vcpu->run->exit_reason));
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- const u64 xf_mask = XFEATURE_MASK_X87_AVX;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
- test_sev(guest_sev_code, 0);
-
- if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
- test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
- test_sev(guest_sev_es_code, SEV_POLICY_ES);
-
- test_sev_es_shutdown();
-
- if (kvm_has_cap(KVM_CAP_XCRS) &&
- (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
- test_sync_vmsa(0);
- test_sync_vmsa(SEV_POLICY_NO_DBG);
- }
- }
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Test that KVM emulates instructions in response to EPT violations when
- * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
- */
-#include "flds_emulation.h"
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MAXPHYADDR 36
-
-#define MEM_REGION_GVA 0x0000123456789000
-#define MEM_REGION_GPA 0x0000000700000000
-#define MEM_REGION_SLOT 10
-#define MEM_REGION_SIZE PAGE_SIZE
-
-static void guest_code(bool tdp_enabled)
-{
- uint64_t error_code;
- uint64_t vector;
-
- vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
-
- /*
- * When TDP is enabled, flds will trigger an emulation failure, exit to
- * userspace, and then the selftest host "VMM" skips the instruction.
- *
- * When TDP is disabled, no instruction emulation is required so flds
- * should generate #PF(RSVD).
- */
- if (tdp_enabled) {
- GUEST_ASSERT(!vector);
- } else {
- GUEST_ASSERT_EQ(vector, PF_VECTOR);
- GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
- }
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- uint64_t *pte;
- uint64_t *hva;
- uint64_t gpa;
- int rc;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
-
- vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
-
- rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
- TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
- vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- MEM_REGION_GPA, MEM_REGION_SLOT,
- MEM_REGION_SIZE / PAGE_SIZE, 0);
- gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
- MEM_REGION_GPA, MEM_REGION_SLOT);
- TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
- virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
- hva = addr_gpa2hva(vm, MEM_REGION_GPA);
- memset(hva, 0, PAGE_SIZE);
-
- pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
- *pte |= BIT_ULL(MAXPHYADDR);
-
- vcpu_run(vcpu);
-
- /*
- * When TDP is enabled, KVM must emulate in response the guest physical
- * address that is illegal from the guest's perspective, but is legal
- * from hardware's perspeective. This should result in an emulation
- * failure exit to userspace since KVM doesn't support emulating flds.
- */
- if (kvm_is_tdp_enabled()) {
- handle_flds_emulation_failure_exit(vcpu);
- vcpu_run(vcpu);
- }
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
- }
-
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for SMM.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "vmx.h"
-#include "svm_util.h"
-
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
-#define SMRAM_GPA 0x1000000
-#define SMRAM_STAGE 0xfe
-
-#define STR(x) #x
-#define XSTR(s) STR(s)
-
-#define SYNC_PORT 0xe
-#define DONE 0xff
-
-/*
- * This is compiled as normal 64-bit code, however, SMI handler is executed
- * in real-address mode. To stay simple we're limiting ourselves to a mode
- * independent subset of asm here.
- * SMI handler always report back fixed stage SMRAM_STAGE.
- */
-uint8_t smi_handler[] = {
- 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
- 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
- 0x0f, 0xaa, /* rsm */
-};
-
-static inline void sync_with_host(uint64_t phase)
-{
- asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
- : "+a" (phase));
-}
-
-static void self_smi(void)
-{
- x2apic_write_reg(APIC_ICR,
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
-}
-
-static void l2_guest_code(void)
-{
- sync_with_host(8);
-
- sync_with_host(10);
-
- vmcall();
-}
-
-static void guest_code(void *arg)
-{
- #define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
- struct svm_test_data *svm = arg;
- struct vmx_pages *vmx_pages = arg;
-
- sync_with_host(1);
-
- wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
-
- sync_with_host(2);
-
- self_smi();
-
- sync_with_host(4);
-
- if (arg) {
- if (this_cpu_has(X86_FEATURE_SVM)) {
- generic_svm_setup(svm, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- } else {
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- }
-
- sync_with_host(5);
-
- self_smi();
-
- sync_with_host(7);
-
- if (this_cpu_has(X86_FEATURE_SVM)) {
- run_guest(svm->vmcb, svm->vmcb_gpa);
- run_guest(svm->vmcb, svm->vmcb_gpa);
- } else {
- vmlaunch();
- vmresume();
- }
-
- /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
- sync_with_host(12);
- }
-
- sync_with_host(DONE);
-}
-
-void inject_smi(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events;
-
- vcpu_events_get(vcpu, &events);
-
- events.smi.pending = 1;
- events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
- vcpu_events_set(vcpu, &events);
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t nested_gva = 0;
-
- struct kvm_vcpu *vcpu;
- struct kvm_regs regs;
- struct kvm_vm *vm;
- struct kvm_x86_state *state;
- int stage, stage_reported;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
- SMRAM_MEMSLOT, SMRAM_PAGES, 0);
- TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
- == SMRAM_GPA, "could not allocate guest physical addresses?");
-
- memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
- memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
- sizeof(smi_handler));
-
- vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
-
- if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
- if (kvm_cpu_has(X86_FEATURE_SVM))
- vcpu_alloc_svm(vm, &nested_gva);
- else if (kvm_cpu_has(X86_FEATURE_VMX))
- vcpu_alloc_vmx(vm, &nested_gva);
- }
-
- if (!nested_gva)
- pr_info("will skip SMM test with VMX enabled\n");
-
- vcpu_args_set(vcpu, 1, nested_gva);
-
- for (stage = 1;; stage++) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- memset(®s, 0, sizeof(regs));
- vcpu_regs_get(vcpu, ®s);
-
- stage_reported = regs.rax & 0xff;
-
- if (stage_reported == DONE)
- goto done;
-
- TEST_ASSERT(stage_reported == stage ||
- stage_reported == SMRAM_STAGE,
- "Unexpected stage: #%x, got %x",
- stage, stage_reported);
-
- /*
- * Enter SMM during L2 execution and check that we correctly
- * return from it. Do not perform save/restore while in SMM yet.
- */
- if (stage == 8) {
- inject_smi(vcpu);
- continue;
- }
-
- /*
- * Perform save/restore while the guest is in SMM triggered
- * during L2 execution.
- */
- if (stage == 10)
- inject_smi(vcpu);
-
- state = vcpu_save_state(vcpu);
- kvm_vm_release(vm);
-
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
- }
-
-done:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_GET/SET_* tests
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for vCPU state save/restore, including nested guest state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-void svm_l2_guest_code(void)
-{
- GUEST_SYNC(4);
- /* Exit to L1 */
- vmcall();
- GUEST_SYNC(6);
- /* Done, exit to L1 and never come back. */
- vmcall();
-}
-
-static void svm_l1_guest_code(struct svm_test_data *svm)
-{
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- struct vmcb *vmcb = svm->vmcb;
-
- GUEST_ASSERT(svm->vmcb_gpa);
- /* Prepare for L2 execution. */
- generic_svm_setup(svm, svm_l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(3);
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- GUEST_SYNC(5);
- vmcb->save.rip += 3;
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- GUEST_SYNC(7);
-}
-
-void vmx_l2_guest_code(void)
-{
- GUEST_SYNC(6);
-
- /* Exit to L1 */
- vmcall();
-
- /* L1 has now set up a shadow VMCS for us. */
- GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
- GUEST_SYNC(10);
- GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
- GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
- GUEST_SYNC(11);
- GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
- GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
- GUEST_SYNC(12);
-
- /* Done, exit to L1 and never come back. */
- vmcall();
-}
-
-static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
-{
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- GUEST_ASSERT(vmx_pages->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_SYNC(3);
- GUEST_ASSERT(load_vmcs(vmx_pages));
- GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
- GUEST_SYNC(4);
- GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
- prepare_vmcs(vmx_pages, vmx_l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(5);
- GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- /* Check that the launched state is preserved. */
- GUEST_ASSERT(vmlaunch());
-
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- GUEST_SYNC(7);
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
-
- vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
- vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
-
- GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
- GUEST_ASSERT(vmlaunch());
- GUEST_SYNC(8);
- GUEST_ASSERT(vmlaunch());
- GUEST_ASSERT(vmresume());
-
- vmwrite(GUEST_RIP, 0xc0ffee);
- GUEST_SYNC(9);
- GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-
- GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
- GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
- GUEST_ASSERT(vmlaunch());
- GUEST_ASSERT(vmresume());
- GUEST_SYNC(13);
- GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
- GUEST_ASSERT(vmlaunch());
- GUEST_ASSERT(vmresume());
-}
-
-static void __attribute__((__flatten__)) guest_code(void *arg)
-{
- GUEST_SYNC(1);
-
- if (this_cpu_has(X86_FEATURE_XSAVE)) {
- uint64_t supported_xcr0 = this_cpu_supported_xcr0();
- uint8_t buffer[4096];
-
- memset(buffer, 0xcc, sizeof(buffer));
-
- /*
- * Modify state for all supported xfeatures to take them out of
- * their "init" state, i.e. to make them show up in XSTATE_BV.
- *
- * Note off-by-default features, e.g. AMX, are out of scope for
- * this particular testcase as they have a different ABI.
- */
- GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
- asm volatile ("fincstp");
-
- GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
- asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
-
- if (supported_xcr0 & XFEATURE_MASK_YMM)
- asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
-
- if (supported_xcr0 & XFEATURE_MASK_AVX512) {
- asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
- asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
- asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
- }
-
- if (this_cpu_has(X86_FEATURE_MPX)) {
- uint64_t bounds[2] = { 10, 0xffffffffull };
- uint64_t output[2] = { };
-
- GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
- GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
-
- /*
- * Don't bother trying to get BNDCSR into the INUSE
- * state. MSR_IA32_BNDCFGS doesn't count as it isn't
- * managed via XSAVE/XRSTOR, and BNDCFGU can only be
- * modified by XRSTOR. Stuffing XSTATE_BV in the host
- * is simpler than doing XRSTOR here in the guest.
- *
- * However, temporarily enable MPX in BNDCFGS so that
- * BNDMOV actually loads BND1. If MPX isn't *fully*
- * enabled, all MPX instructions are treated as NOPs.
- *
- * Hand encode "bndmov (%rax),%bnd1" as support for MPX
- * mnemonics/registers has been removed from gcc and
- * clang (and was never fully supported by clang).
- */
- wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
- asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
- /*
- * Hand encode "bndmov %bnd1, (%rax)" to sanity check
- * that BND1 actually got loaded.
- */
- asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
- wrmsr(MSR_IA32_BNDCFGS, 0);
-
- GUEST_ASSERT_EQ(bounds[0], output[0]);
- GUEST_ASSERT_EQ(bounds[1], output[1]);
- }
- if (this_cpu_has(X86_FEATURE_PKU)) {
- GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
- set_cr4(get_cr4() | X86_CR4_PKE);
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
-
- wrpkru(-1u);
- }
- }
-
- GUEST_SYNC(2);
-
- if (arg) {
- if (this_cpu_has(X86_FEATURE_SVM))
- svm_l1_guest_code(arg);
- else
- vmx_l1_guest_code(arg);
- }
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- uint64_t *xstate_bv, saved_xstate_bv;
- vm_vaddr_t nested_gva = 0;
- struct kvm_cpuid2 empty_cpuid = {};
- struct kvm_regs regs1, regs2;
- struct kvm_vcpu *vcpu, *vcpuN;
- struct kvm_vm *vm;
- struct kvm_x86_state *state;
- struct ucall uc;
- int stage;
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vcpu_regs_get(vcpu, ®s1);
-
- if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
- if (kvm_cpu_has(X86_FEATURE_SVM))
- vcpu_alloc_svm(vm, &nested_gva);
- else if (kvm_cpu_has(X86_FEATURE_VMX))
- vcpu_alloc_vmx(vm, &nested_gva);
- }
-
- if (!nested_gva)
- pr_info("will skip nested state checks\n");
-
- vcpu_args_set(vcpu, 1, nested_gva);
-
- for (stage = 1;; stage++) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- state = vcpu_save_state(vcpu);
- memset(®s1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, ®s1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
-
- /*
- * Restore XSAVE state in a dummy vCPU, first without doing
- * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
- * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
- * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
- * load only XSAVE state, MSRs in particular have a much more
- * convoluted ABI.
- *
- * Load two versions of XSAVE state: one with the actual guest
- * XSAVE state, and one with all supported features forced "on"
- * in xstate_bv, e.g. to ensure that KVM allows loading all
- * supported features, even if something goes awry in saving
- * the original snapshot.
- */
- xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
- saved_xstate_bv = *xstate_bv;
-
- vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
- vcpu_xsave_set(vcpuN, state->xsave);
- *xstate_bv = kvm_cpu_supported_xcr0();
- vcpu_xsave_set(vcpuN, state->xsave);
-
- vcpu_init_cpuid(vcpuN, &empty_cpuid);
- vcpu_xsave_set(vcpuN, state->xsave);
- *xstate_bv = saved_xstate_bv;
- vcpu_xsave_set(vcpuN, state->xsave);
-
- kvm_x86_state_cleanup(state);
-
- memset(®s2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, ®s2);
- TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- }
-
-done:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_int_ctl_test
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "apic.h"
-
-bool vintr_irq_called;
-bool intr_irq_called;
-
-#define VINTR_IRQ_NUMBER 0x20
-#define INTR_IRQ_NUMBER 0x30
-
-static void vintr_irq_handler(struct ex_regs *regs)
-{
- vintr_irq_called = true;
-}
-
-static void intr_irq_handler(struct ex_regs *regs)
-{
- x2apic_write_reg(APIC_EOI, 0x00);
- intr_irq_called = true;
-}
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
- /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
- * and since L1 didn't enable virtual interrupt masking,
- * L2 should receive it and not L1.
- *
- * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
- * so it should also receive it after the following 'sti'.
- */
- x2apic_write_reg(APIC_ICR,
- APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
-
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
-
- GUEST_ASSERT(vintr_irq_called);
- GUEST_ASSERT(intr_irq_called);
-
- __asm__ __volatile__(
- "vmcall\n"
- );
-}
-
-static void l1_guest_code(struct svm_test_data *svm)
-{
- #define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- struct vmcb *vmcb = svm->vmcb;
-
- x2apic_enable();
-
- /* Prepare for L2 execution. */
- generic_svm_setup(svm, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- /* No virtual interrupt masking */
- vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
- /* No intercepts for real and virtual interrupts */
- vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
-
- /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
- vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
- vmcb->control.int_vector = VINTR_IRQ_NUMBER;
-
- run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- vm_vaddr_t svm_gva;
- struct kvm_vm *vm;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
- vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
- vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
-
- vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vcpu, 1, svm_gva);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- /* NOT REACHED */
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_nested_shutdown_test
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
- __asm__ __volatile__("ud2");
-}
-
-static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
-{
- #define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- struct vmcb *vmcb = svm->vmcb;
-
- generic_svm_setup(svm, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
-
- idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
- idt[11].p = 0; // #NP is not intercepted and will cause another
- // #NP that will be converted to #DF
- idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
-
- run_guest(vmcb, svm->vmcb_gpa);
-
- /* should not reach here */
- GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- vm_vaddr_t svm_gva;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_svm(vm, &svm_gva);
-
- vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2022 Oracle and/or its affiliates.
- *
- * Based on:
- * svm_int_ctl_test
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- */
-#include <stdatomic.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "apic.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "test_util.h"
-
-#define INT_NR 0x20
-
-static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
-
-static unsigned int bp_fired;
-static void guest_bp_handler(struct ex_regs *regs)
-{
- bp_fired++;
-}
-
-static unsigned int int_fired;
-static void l2_guest_code_int(void);
-
-static void guest_int_handler(struct ex_regs *regs)
-{
- int_fired++;
- GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
-}
-
-static void l2_guest_code_int(void)
-{
- GUEST_ASSERT_EQ(int_fired, 1);
-
- /*
- * Same as the vmmcall() function, but with a ud2 sneaked after the
- * vmmcall. The caller injects an exception with the return address
- * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
- * use vmmcall() directly.
- */
- __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
- : : "a"(0xdeadbeef), "c"(0xbeefdead)
- : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13", "r14", "r15");
-
- GUEST_ASSERT_EQ(bp_fired, 1);
- hlt();
-}
-
-static atomic_int nmi_stage;
-#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
-#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
-static void guest_nmi_handler(struct ex_regs *regs)
-{
- nmi_stage_inc();
-
- if (nmi_stage_get() == 1) {
- vmmcall();
- GUEST_FAIL("Unexpected resume after VMMCALL");
- } else {
- GUEST_ASSERT_EQ(nmi_stage_get(), 3);
- GUEST_DONE();
- }
-}
-
-static void l2_guest_code_nmi(void)
-{
- ud2();
-}
-
-static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
-{
- #define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- struct vmcb *vmcb = svm->vmcb;
-
- if (is_nmi)
- x2apic_enable();
-
- /* Prepare for L2 execution. */
- generic_svm_setup(svm,
- is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
- vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
-
- if (is_nmi) {
- vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
- } else {
- vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
- /* The return address pushed on stack */
- vmcb->control.next_rip = vmcb->save.rip;
- }
-
- run_guest(vmcb, svm->vmcb_gpa);
- __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
- "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
- vmcb->control.exit_code,
- vmcb->control.exit_info_1, vmcb->control.exit_info_2);
-
- if (is_nmi) {
- clgi();
- x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
-
- GUEST_ASSERT_EQ(nmi_stage_get(), 1);
- nmi_stage_inc();
-
- stgi();
- /* self-NMI happens here */
- while (true)
- cpu_relax();
- }
-
- /* Skip over VMMCALL */
- vmcb->save.rip += 3;
-
- /* Switch to alternate IDT to cause intervening NPF again */
- vmcb->save.idtr.base = idt_alt;
- vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
-
- vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
- /* The return address pushed on stack, skip over UD2 */
- vmcb->control.next_rip = vmcb->save.rip + 2;
-
- run_guest(vmcb, svm->vmcb_gpa);
- __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
- "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
- vmcb->control.exit_code,
- vmcb->control.exit_info_1, vmcb->control.exit_info_2);
-
- GUEST_DONE();
-}
-
-static void run_test(bool is_nmi)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- vm_vaddr_t svm_gva;
- vm_vaddr_t idt_alt_vm;
- struct kvm_guest_debug debug;
-
- pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
- vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
- vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
- vm_install_exception_handler(vm, INT_NR, guest_int_handler);
-
- vcpu_alloc_svm(vm, &svm_gva);
-
- if (!is_nmi) {
- void *idt, *idt_alt;
-
- idt_alt_vm = vm_vaddr_alloc_page(vm);
- idt_alt = addr_gva2hva(vm, idt_alt_vm);
- idt = addr_gva2hva(vm, vm->arch.idt);
- memcpy(idt_alt, idt, getpagesize());
- } else {
- idt_alt_vm = 0;
- }
- vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
-
- memset(&debug, 0, sizeof(debug));
- vcpu_guest_debug_set(vcpu, &debug);
-
- struct ucall uc;
-
- alarm(2);
- vcpu_run(vcpu);
- alarm(0);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- /* NOT REACHED */
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
-done:
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
- TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
- "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
-
- atomic_init(&nmi_stage, 0);
-
- run_test(false);
- run_test(true);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_vmcall_test
- *
- * Copyright (C) 2020, Red Hat, Inc.
- *
- * Nested SVM testing: VMCALL
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
- __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct svm_test_data *svm)
-{
- #define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- struct vmcb *vmcb = svm->vmcb;
-
- /* Prepare for L2 execution. */
- generic_svm_setup(svm, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- run_guest(vmcb, svm->vmcb_gpa);
-
- GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- vm_vaddr_t svm_gva;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
- vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vcpu, 1, svm_gva);
-
- for (;;) {
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
- }
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for x86 KVM_CAP_SYNC_REGS
- *
- * Copyright (C) 2018, Google LLC.
- *
- * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
- * including requesting an invalid register set, updates to/from values
- * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <pthread.h>
-
-#include "kvm_test_harness.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-struct ucall uc_none = {
- .cmd = UCALL_NONE,
-};
-
-/*
- * ucall is embedded here to protect against compiler reshuffling registers
- * before calling a function. In this test we only need to get KVM_EXIT_IO
- * vmexit and preserve RBX, no additional information is needed.
- */
-void guest_code(void)
-{
- asm volatile("1: in %[port], %%al\n"
- "add $0x1, %%rbx\n"
- "jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
- : "rax", "rbx");
-}
-
-KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
-
-static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
-{
-#define REG_COMPARE(reg) \
- TEST_ASSERT(left->reg == right->reg, \
- "Register " #reg \
- " values did not match: 0x%llx, 0x%llx", \
- left->reg, right->reg)
- REG_COMPARE(rax);
- REG_COMPARE(rbx);
- REG_COMPARE(rcx);
- REG_COMPARE(rdx);
- REG_COMPARE(rsi);
- REG_COMPARE(rdi);
- REG_COMPARE(rsp);
- REG_COMPARE(rbp);
- REG_COMPARE(r8);
- REG_COMPARE(r9);
- REG_COMPARE(r10);
- REG_COMPARE(r11);
- REG_COMPARE(r12);
- REG_COMPARE(r13);
- REG_COMPARE(r14);
- REG_COMPARE(r15);
- REG_COMPARE(rip);
- REG_COMPARE(rflags);
-#undef REG_COMPARE
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
-{
-}
-
-static void compare_vcpu_events(struct kvm_vcpu_events *left,
- struct kvm_vcpu_events *right)
-{
-}
-
-#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
-#define INVALID_SYNC_FIELD 0x80000000
-
-/*
- * Set an exception as pending *and* injected while KVM is processing events.
- * KVM is supposed to ignore/drop pending exceptions if userspace is also
- * requesting that an exception be injected.
- */
-static void *race_events_inj_pen(void *arg)
-{
- struct kvm_run *run = (struct kvm_run *)arg;
- struct kvm_vcpu_events *events = &run->s.regs.events;
-
- WRITE_ONCE(events->exception.nr, UD_VECTOR);
-
- for (;;) {
- WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
- WRITE_ONCE(events->flags, 0);
- WRITE_ONCE(events->exception.injected, 1);
- WRITE_ONCE(events->exception.pending, 1);
-
- pthread_testcancel();
- }
-
- return NULL;
-}
-
-/*
- * Set an invalid exception vector while KVM is processing events. KVM is
- * supposed to reject any vector >= 32, as well as NMIs (vector 2).
- */
-static void *race_events_exc(void *arg)
-{
- struct kvm_run *run = (struct kvm_run *)arg;
- struct kvm_vcpu_events *events = &run->s.regs.events;
-
- for (;;) {
- WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
- WRITE_ONCE(events->flags, 0);
- WRITE_ONCE(events->exception.nr, UD_VECTOR);
- WRITE_ONCE(events->exception.pending, 1);
- WRITE_ONCE(events->exception.nr, 255);
-
- pthread_testcancel();
- }
-
- return NULL;
-}
-
-/*
- * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
- * illegal, and KVM's MMU heavily relies on vCPU state being valid.
- */
-static noinline void *race_sregs_cr4(void *arg)
-{
- struct kvm_run *run = (struct kvm_run *)arg;
- __u64 *cr4 = &run->s.regs.sregs.cr4;
- __u64 pae_enabled = *cr4;
- __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
-
- for (;;) {
- WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
- WRITE_ONCE(*cr4, pae_enabled);
- asm volatile(".rept 512\n\t"
- "nop\n\t"
- ".endr");
- WRITE_ONCE(*cr4, pae_disabled);
-
- pthread_testcancel();
- }
-
- return NULL;
-}
-
-static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
-{
- const time_t TIMEOUT = 2; /* seconds, roughly */
- struct kvm_x86_state *state;
- struct kvm_translation tr;
- struct kvm_run *run;
- pthread_t thread;
- time_t t;
-
- run = vcpu->run;
-
- run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
- vcpu_run(vcpu);
- run->kvm_valid_regs = 0;
-
- /* Save state *before* spawning the thread that mucks with vCPU state. */
- state = vcpu_save_state(vcpu);
-
- /*
- * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
- * should already be set in guest state.
- */
- TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
- (run->s.regs.sregs.efer & EFER_LME),
- "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
- !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
- !!(run->s.regs.sregs.efer & EFER_LME));
-
- TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
-
- for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
- /*
- * Reload known good state if the vCPU triple faults, e.g. due
- * to the unhandled #GPs being injected. VMX preserves state
- * on shutdown, but SVM synthesizes an INIT as the VMCB state
- * is architecturally undefined on triple fault.
- */
- if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
- vcpu_load_state(vcpu, state);
-
- if (racer == race_sregs_cr4) {
- tr = (struct kvm_translation) { .linear_address = 0 };
- __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
- }
- }
-
- TEST_ASSERT_EQ(pthread_cancel(thread), 0);
- TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
-
- kvm_x86_state_cleanup(state);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
-{
- struct kvm_run *run = vcpu->run;
- int rv;
-
- /* Request reading invalid register set from VCPU. */
- run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_valid_regs = 0;
-
- run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_valid_regs = 0;
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
-{
- struct kvm_run *run = vcpu->run;
- int rv;
-
- /* Request setting invalid register set into VCPU. */
- run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_dirty_regs = 0;
-
- run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vcpu);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
- rv);
- run->kvm_dirty_regs = 0;
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_vcpu_events events;
- struct kvm_sregs sregs;
- struct kvm_regs regs;
-
- /* Request and verify all valid register sets. */
- /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- vcpu_regs_get(vcpu, ®s);
- compare_regs(®s, &run->s.regs.regs);
-
- vcpu_sregs_get(vcpu, &sregs);
- compare_sregs(&sregs, &run->s.regs.sregs);
-
- vcpu_events_get(vcpu, &events);
- compare_vcpu_events(&events, &run->s.regs.events);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_vcpu_events events;
- struct kvm_sregs sregs;
- struct kvm_regs regs;
-
- /* Run once to get register set */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- /* Set and verify various register values. */
- run->s.regs.regs.rbx = 0xBAD1DEA;
- run->s.regs.sregs.apic_base = 1 << 11;
- /* TODO run->s.regs.events.XYZ = ABC; */
-
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
- TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
- "apic_base sync regs value incorrect 0x%llx.",
- run->s.regs.sregs.apic_base);
-
- vcpu_regs_get(vcpu, ®s);
- compare_regs(®s, &run->s.regs.regs);
-
- vcpu_sregs_get(vcpu, &sregs);
- compare_sregs(&sregs, &run->s.regs.sregs);
-
- vcpu_events_get(vcpu, &events);
- compare_vcpu_events(&events, &run->s.regs.events);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
-{
- struct kvm_run *run = vcpu->run;
-
- /* Clear kvm_dirty_regs bits, verify new s.regs values are
- * overwritten with existing guest values.
- */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- run->kvm_dirty_regs = 0;
- run->s.regs.regs.rbx = 0xDEADBEEF;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_regs regs;
-
- /* Run once to get register set */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- /* Clear kvm_valid_regs bits and kvm_dirty_bits.
- * Verify s.regs values are not overwritten with existing guest values
- * and that guest values are not overwritten with kvm_sync_regs values.
- */
- run->kvm_valid_regs = 0;
- run->kvm_dirty_regs = 0;
- run->s.regs.regs.rbx = 0xAAAA;
- vcpu_regs_get(vcpu, ®s);
- regs.rbx = 0xBAC0;
- vcpu_regs_set(vcpu, ®s);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
- vcpu_regs_get(vcpu, ®s);
- TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
- "rbx guest value incorrect 0x%llx.",
- regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
-{
- struct kvm_run *run = vcpu->run;
- struct kvm_regs regs;
-
- /* Run once to get register set */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
- * with existing guest values but that guest values are overwritten
- * with kvm_sync_regs values.
- */
- run->kvm_valid_regs = 0;
- run->kvm_dirty_regs = TEST_SYNC_FIELDS;
- run->s.regs.regs.rbx = 0xBBBB;
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
- vcpu_regs_get(vcpu, ®s);
- TEST_ASSERT(regs.rbx == 0xBBBB + 1,
- "rbx guest value incorrect 0x%llx.",
- regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
-{
- race_sync_regs(vcpu, race_sregs_cr4);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
-{
- race_sync_regs(vcpu, race_events_exc);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
-{
- race_sync_regs(vcpu, race_events_inj_pen);
-}
-
-int main(int argc, char *argv[])
-{
- int cap;
-
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
- TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
-
- return test_harness_run(argc, argv);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define ARBITRARY_IO_PORT 0x2000
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
-}
-
-#define L2_GUEST_STACK_SIZE 64
-unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-void l1_guest_code_vmx(struct vmx_pages *vmx)
-{
-
- GUEST_ASSERT(vmx->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx));
- GUEST_ASSERT(load_vmcs(vmx));
-
- prepare_vmcs(vmx, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_ASSERT(!vmlaunch());
- /* L2 should triple fault after a triple fault event injected. */
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
- GUEST_DONE();
-}
-
-void l1_guest_code_svm(struct svm_test_data *svm)
-{
- struct vmcb *vmcb = svm->vmcb;
-
- generic_svm_setup(svm, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- /* don't intercept shutdown to test the case of SVM allowing to do so */
- vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
-
- run_guest(vmcb, svm->vmcb_gpa);
-
- /* should not reach here, L1 should crash */
- GUEST_ASSERT(0);
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_run *run;
- struct kvm_vcpu_events events;
- struct ucall uc;
-
- bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
- bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
-
- TEST_REQUIRE(has_vmx || has_svm);
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
-
-
- if (has_vmx) {
- vm_vaddr_t vmx_pages_gva;
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
- } else {
- vm_vaddr_t svm_gva;
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
- vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vcpu, 1, svm_gva);
- }
-
- vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
- run = vcpu->run;
- vcpu_run(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
- "Expected IN from port %d from L2, got port %d",
- ARBITRARY_IO_PORT, run->io.port);
- vcpu_events_get(vcpu, &events);
- events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
- events.triple_fault.pending = true;
- vcpu_events_set(vcpu, &events);
- run->immediate_exit = true;
- vcpu_run_complete_io(vcpu);
-
- vcpu_events_get(vcpu, &events);
- TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
- "Triple fault event invalid");
- TEST_ASSERT(events.triple_fault.pending,
- "No triple fault pending");
- vcpu_run(vcpu);
-
-
- if (has_svm) {
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
- } else {
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_DONE:
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
- }
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <string.h>
-#include "kvm_util.h"
-#include "processor.h"
-
-#define UNITY (1ull << 30)
-#define HOST_ADJUST (UNITY * 64)
-#define GUEST_STEP (UNITY * 4)
-#define ROUND(x) ((x + UNITY / 2) & -UNITY)
-#define rounded_rdmsr(x) ROUND(rdmsr(x))
-#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x))
-
-static void guest_code(void)
-{
- u64 val = 0;
-
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
- val = 1ull * GUEST_STEP;
- wrmsr(MSR_IA32_TSC, val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
- GUEST_SYNC(2);
- val = 2ull * GUEST_STEP;
- wrmsr(MSR_IA32_TSC_ADJUST, val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /* Host: setting the TSC offset. */
- GUEST_SYNC(3);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /*
- * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
- * host-side offset and affect both MSRs.
- */
- GUEST_SYNC(4);
- val = 3ull * GUEST_STEP;
- wrmsr(MSR_IA32_TSC_ADJUST, val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /*
- * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
- * offset is now visible in MSR_IA32_TSC_ADJUST.
- */
- GUEST_SYNC(5);
- val = 4ull * GUEST_STEP;
- wrmsr(MSR_IA32_TSC, val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
- GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
-
- GUEST_DONE();
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- if (!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1)
- ksft_test_result_pass("stage %d passed\n", stage + 1);
- else
- ksft_test_result_fail(
- "stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
- return;
- case UCALL_DONE:
- ksft_test_result_pass("stage %d passed\n", stage + 1);
- return;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu->run->exit_reason));
- }
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t val;
-
- ksft_print_header();
- ksft_set_plan(5);
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- val = 0;
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
- run_vcpu(vcpu, 1);
- val = 1ull * GUEST_STEP;
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
- run_vcpu(vcpu, 2);
- val = 2ull * GUEST_STEP;
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /*
- * Host: writes to MSR_IA32_TSC set the host-side offset
- * and therefore do not change MSR_IA32_TSC_ADJUST.
- */
- vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
- run_vcpu(vcpu, 3);
-
- /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
- vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
-
- /* Restore previous value. */
- vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /*
- * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
- * host-side offset and affect both MSRs.
- */
- run_vcpu(vcpu, 4);
- val = 3ull * GUEST_STEP;
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
- /*
- * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
- * offset is now visible in MSR_IA32_TSC_ADJUST.
- */
- run_vcpu(vcpu, 5);
- val = 4ull * GUEST_STEP;
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
-
- kvm_vm_free(vm);
-
- ksft_finished(); /* Print results and exit() accordingly */
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <sched.h>
-#include <signal.h>
-#include <pthread.h>
-
-#define NR_TEST_VCPUS 20
-
-static struct kvm_vm *vm;
-pthread_spinlock_t create_lock;
-
-#define TEST_TSC_KHZ 2345678UL
-#define TEST_TSC_OFFSET 200000000
-
-uint64_t tsc_sync;
-static void guest_code(void)
-{
- uint64_t start_tsc, local_tsc, tmp;
-
- start_tsc = rdtsc();
- do {
- tmp = READ_ONCE(tsc_sync);
- local_tsc = rdtsc();
- WRITE_ONCE(tsc_sync, local_tsc);
- if (unlikely(local_tsc < tmp))
- GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
-
- } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
-
- GUEST_DONE();
-}
-
-
-static void *run_vcpu(void *_cpu_nr)
-{
- unsigned long vcpu_id = (unsigned long)_cpu_nr;
- unsigned long failures = 0;
- static bool first_cpu_done;
- struct kvm_vcpu *vcpu;
-
- /* The kernel is fine, but vm_vcpu_add() needs locking */
- pthread_spin_lock(&create_lock);
-
- vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
-
- if (!first_cpu_done) {
- first_cpu_done = true;
- vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
- }
-
- pthread_spin_unlock(&create_lock);
-
- for (;;) {
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_DONE:
- goto out;
-
- case UCALL_SYNC:
- printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
- uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
- failures++;
- break;
-
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
- out:
- return (void *)failures;
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
-
- vm = vm_create(NR_TEST_VCPUS);
- vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
-
- pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
- pthread_t cpu_threads[NR_TEST_VCPUS];
- unsigned long cpu;
- for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
- pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
-
- unsigned long failures = 0;
- for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
- void *this_cpu_failures;
- pthread_join(cpu_threads[cpu], &this_cpu_failures);
- failures += (unsigned long)this_cpu_failures;
- }
-
- TEST_ASSERT(!failures, "TSC sync failed");
- pthread_spin_destroy(&create_lock);
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucna_injection_test
- *
- * Copyright (C) 2022, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that user space can inject UnCorrectable No Action required (UCNA)
- * memory errors to the guest.
- *
- * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
- * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
- * corresponding per-bank control register (MCI_CTL2) bit enabled.
- * The test also checks that the UCNA errors get recorded in the
- * Machine Check bank registers no matter the error signal interrupts get
- * delivered into the guest or not.
- *
- */
-#include <pthread.h>
-#include <inttypes.h>
-#include <string.h>
-#include <time.h>
-
-#include "kvm_util.h"
-#include "mce.h"
-#include "processor.h"
-#include "test_util.h"
-#include "apic.h"
-
-#define SYNC_FIRST_UCNA 9
-#define SYNC_SECOND_UCNA 10
-#define SYNC_GP 11
-#define FIRST_UCNA_ADDR 0xdeadbeef
-#define SECOND_UCNA_ADDR 0xcafeb0ba
-
-/*
- * Vector for the CMCI interrupt.
- * Value is arbitrary. Any value in 0x20-0xFF should work:
- * https://wiki.osdev.org/Interrupt_Vector_Table
- */
-#define CMCI_VECTOR 0xa9
-
-#define UCNA_BANK 0x7 // IMC0 bank
-
-#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
-
-static uint64_t supported_mcg_caps;
-
-/*
- * Record states about the injected UCNA.
- * The variables started with the 'i_' prefixes are recorded in interrupt
- * handler. Variables without the 'i_' prefixes are recorded in guest main
- * execution thread.
- */
-static volatile uint64_t i_ucna_rcvd;
-static volatile uint64_t i_ucna_addr;
-static volatile uint64_t ucna_addr;
-static volatile uint64_t ucna_addr2;
-
-struct thread_params {
- struct kvm_vcpu *vcpu;
- uint64_t *p_i_ucna_rcvd;
- uint64_t *p_i_ucna_addr;
- uint64_t *p_ucna_addr;
- uint64_t *p_ucna_addr2;
-};
-
-static void verify_apic_base_addr(void)
-{
- uint64_t msr = rdmsr(MSR_IA32_APICBASE);
- uint64_t base = GET_APIC_BASE(msr);
-
- GUEST_ASSERT(base == APIC_DEFAULT_GPA);
-}
-
-static void ucna_injection_guest_code(void)
-{
- uint64_t ctl2;
- verify_apic_base_addr();
- xapic_enable();
-
- /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
- xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
- ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
- wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
-
- /* Enables interrupt in guest. */
- asm volatile("sti");
-
- /* Let user space inject the first UCNA */
- GUEST_SYNC(SYNC_FIRST_UCNA);
-
- ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-
- /* Disables the per-bank CMCI signaling. */
- ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
- wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
-
- /* Let the user space inject the second UCNA */
- GUEST_SYNC(SYNC_SECOND_UCNA);
-
- ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
- GUEST_DONE();
-}
-
-static void cmci_disabled_guest_code(void)
-{
- uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
- wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
-
- GUEST_DONE();
-}
-
-static void cmci_enabled_guest_code(void)
-{
- uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
- wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
-
- GUEST_DONE();
-}
-
-static void guest_cmci_handler(struct ex_regs *regs)
-{
- i_ucna_rcvd++;
- i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
- xapic_write_reg(APIC_EOI, 0);
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
- GUEST_SYNC(SYNC_GP);
-}
-
-static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
- "Expect UCALL_SYNC");
- TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
- printf("vCPU received GP in guest.\n");
-}
-
-static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
- /*
- * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
- * the IA32_MCi_STATUS register.
- * MSCOD=1 (BIT[16] - MscodDataRdErr).
- * MCACOD=0x0090 (Memory controller error format, channel 0)
- */
- uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
- MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
- struct kvm_x86_mce mce = {};
- mce.status = status;
- mce.mcg_status = 0;
- /*
- * MCM_ADDR_PHYS indicates the reported address is a physical address.
- * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
- * is at 4KB granularity.
- */
- mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
- mce.addr = addr;
- mce.bank = UCNA_BANK;
-
- vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
-}
-
-static void *run_ucna_injection(void *arg)
-{
- struct thread_params *params = (struct thread_params *)arg;
- struct ucall uc;
- int old;
- int r;
-
- r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
- TEST_ASSERT(r == 0,
- "pthread_setcanceltype failed with errno=%d",
- r);
-
- vcpu_run(params->vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
- TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
- "Expect UCALL_SYNC");
- TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
-
- printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
-
- inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
- vcpu_run(params->vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
- TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
- "Expect UCALL_SYNC");
- TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
-
- printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
-
- inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
- vcpu_run(params->vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
- if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
- TEST_ASSERT(false, "vCPU assertion failure: %s.",
- (const char *)uc.args[0]);
- }
-
- return NULL;
-}
-
-static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
-{
- struct kvm_vm *vm = vcpu->vm;
- params->vcpu = vcpu;
- params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
- params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
- params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
- params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
-
- run_ucna_injection(params);
-
- TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
- TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
- "Only first UCNA reported addr get recorded via interrupt.");
- TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
- "First injected UCNAs should get exposed via registers.");
- TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
- "Second injected UCNAs should get exposed via registers.");
-
- printf("Test successful.\n"
- "UCNA CMCI interrupts received: %ld\n"
- "Last UCNA address received via CMCI: %lx\n"
- "First UCNA address in vCPU thread: %lx\n"
- "Second UCNA address in vCPU thread: %lx\n",
- *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
- *params->p_ucna_addr, *params->p_ucna_addr2);
-}
-
-static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
-{
- uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
- if (enable_cmci_p)
- mcg_caps |= MCG_CMCI_P;
-
- mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
- vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
-}
-
-static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
- bool enable_cmci_p, void *guest_code)
-{
- struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
- setup_mce_cap(vcpu, enable_cmci_p);
- return vcpu;
-}
-
-int main(int argc, char *argv[])
-{
- struct thread_params params;
- struct kvm_vm *vm;
- struct kvm_vcpu *ucna_vcpu;
- struct kvm_vcpu *cmcidis_vcpu;
- struct kvm_vcpu *cmci_vcpu;
-
- kvm_check_cap(KVM_CAP_MCE);
-
- vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
-
- kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
- &supported_mcg_caps);
-
- if (!(supported_mcg_caps & MCG_CMCI_P)) {
- print_skip("MCG_CMCI_P is not supported");
- exit(KSFT_SKIP);
- }
-
- ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
- cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
- cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
-
- vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
- vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
- test_ucna_injection(ucna_vcpu, ¶ms);
- run_vcpu_expect_gp(cmcidis_vcpu);
- run_vcpu_expect_gp(cmci_vcpu);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-static void guest_ins_port80(uint8_t *buffer, unsigned int count)
-{
- unsigned long end;
-
- if (count == 2)
- end = (unsigned long)buffer + 1;
- else
- end = (unsigned long)buffer + 8192;
-
- asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
- GUEST_ASSERT_EQ(count, 0);
- GUEST_ASSERT_EQ((unsigned long)buffer, end);
-}
-
-static void guest_code(void)
-{
- uint8_t buffer[8192];
- int i;
-
- /*
- * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
- * test that KVM doesn't explode when userspace modifies the "count" on
- * a userspace I/O exit. KVM isn't required to play nice with the I/O
- * itself as KVM doesn't support manipulating the count, it just needs
- * to not explode or overflow a buffer.
- */
- guest_ins_port80(buffer, 2);
- guest_ins_port80(buffer, 3);
-
- /* Verify KVM fills the buffer correctly when not stuffing RCX. */
- memset(buffer, 0, sizeof(buffer));
- guest_ins_port80(buffer, 8192);
- for (i = 0; i < 8192; i++)
- __GUEST_ASSERT(buffer[i] == 0xaa,
- "Expected '0xaa', got '0x%x' at buffer[%u]",
- buffer[i], i);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_regs regs;
- struct kvm_run *run;
- struct kvm_vm *vm;
- struct ucall uc;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu->run;
-
- memset(®s, 0, sizeof(regs));
-
- while (1) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- if (get_ucall(vcpu, &uc))
- break;
-
- TEST_ASSERT(run->io.port == 0x80,
- "Expected I/O at port 0x80, got port 0x%x", run->io.port);
-
- /*
- * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
- * Note, this abuses KVM's batching of rep string I/O to avoid
- * getting stuck in an infinite loop. That behavior isn't in
- * scope from a testing perspective as it's not ABI in any way,
- * i.e. it really is abusing internal KVM knowledge.
- */
- vcpu_regs_get(vcpu, ®s);
- if (regs.rcx == 2)
- regs.rcx = 1;
- if (regs.rcx == 3)
- regs.rcx = 8192;
- memset((void *)run + run->io.data_offset, 0xaa, 4096);
- vcpu_regs_set(vcpu, ®s);
- }
-
- switch (uc.cmd) {
- case UCALL_DONE:
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for exiting into userspace on registered MSRs
- */
-#include <sys/ioctl.h>
-
-#include "kvm_test_harness.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MSR_NON_EXISTENT 0x474f4f00
-
-static u64 deny_bits = 0;
-struct kvm_msr_filter filter_allow = {
- .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
- .ranges = {
- {
- .flags = KVM_MSR_FILTER_READ |
- KVM_MSR_FILTER_WRITE,
- .nmsrs = 1,
- /* Test an MSR the kernel knows about. */
- .base = MSR_IA32_XSS,
- .bitmap = (uint8_t*)&deny_bits,
- }, {
- .flags = KVM_MSR_FILTER_READ |
- KVM_MSR_FILTER_WRITE,
- .nmsrs = 1,
- /* Test an MSR the kernel doesn't know about. */
- .base = MSR_IA32_FLUSH_CMD,
- .bitmap = (uint8_t*)&deny_bits,
- }, {
- .flags = KVM_MSR_FILTER_READ |
- KVM_MSR_FILTER_WRITE,
- .nmsrs = 1,
- /* Test a fabricated MSR that no one knows about. */
- .base = MSR_NON_EXISTENT,
- .bitmap = (uint8_t*)&deny_bits,
- },
- },
-};
-
-struct kvm_msr_filter filter_fs = {
- .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
- .ranges = {
- {
- .flags = KVM_MSR_FILTER_READ,
- .nmsrs = 1,
- .base = MSR_FS_BASE,
- .bitmap = (uint8_t*)&deny_bits,
- },
- },
-};
-
-struct kvm_msr_filter filter_gs = {
- .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
- .ranges = {
- {
- .flags = KVM_MSR_FILTER_READ,
- .nmsrs = 1,
- .base = MSR_GS_BASE,
- .bitmap = (uint8_t*)&deny_bits,
- },
- },
-};
-
-static uint64_t msr_non_existent_data;
-static int guest_exception_count;
-static u32 msr_reads, msr_writes;
-
-static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_deadbeef[1] = { 0x1 };
-
-static void deny_msr(uint8_t *bitmap, u32 msr)
-{
- u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
-
- bitmap[idx / 8] &= ~(1 << (idx % 8));
-}
-
-static void prepare_bitmaps(void)
-{
- memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
- memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
- memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
- memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
- memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
-
- deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
- deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
- deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
-}
-
-struct kvm_msr_filter filter_deny = {
- .flags = KVM_MSR_FILTER_DEFAULT_DENY,
- .ranges = {
- {
- .flags = KVM_MSR_FILTER_READ,
- .base = 0x00000000,
- .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
- .bitmap = bitmap_00000000,
- }, {
- .flags = KVM_MSR_FILTER_WRITE,
- .base = 0x00000000,
- .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
- .bitmap = bitmap_00000000_write,
- }, {
- .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
- .base = 0x40000000,
- .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
- .bitmap = bitmap_40000000,
- }, {
- .flags = KVM_MSR_FILTER_READ,
- .base = 0xc0000000,
- .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
- .bitmap = bitmap_c0000000_read,
- }, {
- .flags = KVM_MSR_FILTER_WRITE,
- .base = 0xc0000000,
- .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
- .bitmap = bitmap_c0000000,
- }, {
- .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
- .base = 0xdeadbeef,
- .nmsrs = 1,
- .bitmap = bitmap_deadbeef,
- },
- },
-};
-
-struct kvm_msr_filter no_filter_deny = {
- .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-};
-
-/*
- * Note: Force test_rdmsr() to not be inlined to prevent the labels,
- * rdmsr_start and rdmsr_end, from being defined multiple times.
- */
-static noinline uint64_t test_rdmsr(uint32_t msr)
-{
- uint32_t a, d;
-
- guest_exception_count = 0;
-
- __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
- "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
- return a | ((uint64_t) d << 32);
-}
-
-/*
- * Note: Force test_wrmsr() to not be inlined to prevent the labels,
- * wrmsr_start and wrmsr_end, from being defined multiple times.
- */
-static noinline void test_wrmsr(uint32_t msr, uint64_t value)
-{
- uint32_t a = value;
- uint32_t d = value >> 32;
-
- guest_exception_count = 0;
-
- __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
- "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-extern char rdmsr_start, rdmsr_end;
-extern char wrmsr_start, wrmsr_end;
-
-/*
- * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
- * rdmsr_start and rdmsr_end, from being defined multiple times.
- */
-static noinline uint64_t test_em_rdmsr(uint32_t msr)
-{
- uint32_t a, d;
-
- guest_exception_count = 0;
-
- __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
- "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
- return a | ((uint64_t) d << 32);
-}
-
-/*
- * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
- * wrmsr_start and wrmsr_end, from being defined multiple times.
- */
-static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
-{
- uint32_t a = value;
- uint32_t d = value >> 32;
-
- guest_exception_count = 0;
-
- __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
- "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-extern char em_rdmsr_start, em_rdmsr_end;
-extern char em_wrmsr_start, em_wrmsr_end;
-
-static void guest_code_filter_allow(void)
-{
- uint64_t data;
-
- /*
- * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
- *
- * A GP is thrown if anything other than 0 is written to
- * MSR_IA32_XSS.
- */
- data = test_rdmsr(MSR_IA32_XSS);
- GUEST_ASSERT(data == 0);
- GUEST_ASSERT(guest_exception_count == 0);
-
- test_wrmsr(MSR_IA32_XSS, 0);
- GUEST_ASSERT(guest_exception_count == 0);
-
- test_wrmsr(MSR_IA32_XSS, 1);
- GUEST_ASSERT(guest_exception_count == 1);
-
- /*
- * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
- *
- * A GP is thrown if MSR_IA32_FLUSH_CMD is read
- * from or if a value other than 1 is written to it.
- */
- test_rdmsr(MSR_IA32_FLUSH_CMD);
- GUEST_ASSERT(guest_exception_count == 1);
-
- test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
- GUEST_ASSERT(guest_exception_count == 1);
-
- test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
- GUEST_ASSERT(guest_exception_count == 0);
-
- /*
- * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
- *
- * Test that a fabricated MSR can pass through the kernel
- * and be handled in userspace.
- */
- test_wrmsr(MSR_NON_EXISTENT, 2);
- GUEST_ASSERT(guest_exception_count == 0);
-
- data = test_rdmsr(MSR_NON_EXISTENT);
- GUEST_ASSERT(data == 2);
- GUEST_ASSERT(guest_exception_count == 0);
-
- if (is_forced_emulation_enabled) {
- /* Let userspace know we aren't done. */
- GUEST_SYNC(0);
-
- /*
- * Now run the same tests with the instruction emulator.
- */
- data = test_em_rdmsr(MSR_IA32_XSS);
- GUEST_ASSERT(data == 0);
- GUEST_ASSERT(guest_exception_count == 0);
- test_em_wrmsr(MSR_IA32_XSS, 0);
- GUEST_ASSERT(guest_exception_count == 0);
- test_em_wrmsr(MSR_IA32_XSS, 1);
- GUEST_ASSERT(guest_exception_count == 1);
-
- test_em_rdmsr(MSR_IA32_FLUSH_CMD);
- GUEST_ASSERT(guest_exception_count == 1);
- test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
- GUEST_ASSERT(guest_exception_count == 1);
- test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
- GUEST_ASSERT(guest_exception_count == 0);
-
- test_em_wrmsr(MSR_NON_EXISTENT, 2);
- GUEST_ASSERT(guest_exception_count == 0);
- data = test_em_rdmsr(MSR_NON_EXISTENT);
- GUEST_ASSERT(data == 2);
- GUEST_ASSERT(guest_exception_count == 0);
- }
-
- GUEST_DONE();
-}
-
-static void guest_msr_calls(bool trapped)
-{
- /* This goes into the in-kernel emulation */
- wrmsr(MSR_SYSCALL_MASK, 0);
-
- if (trapped) {
- /* This goes into user space emulation */
- GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
- GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
- } else {
- GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
- GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
- }
-
- /* If trapped == true, this goes into user space emulation */
- wrmsr(MSR_IA32_POWER_CTL, 0x1234);
-
- /* This goes into the in-kernel emulation */
- rdmsr(MSR_IA32_POWER_CTL);
-
- /* Invalid MSR, should always be handled by user space exit */
- GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
- wrmsr(0xdeadbeef, 0x1234);
-}
-
-static void guest_code_filter_deny(void)
-{
- guest_msr_calls(true);
-
- /*
- * Disable msr filtering, so that the kernel
- * handles everything in the next round
- */
- GUEST_SYNC(0);
-
- guest_msr_calls(false);
-
- GUEST_DONE();
-}
-
-static void guest_code_permission_bitmap(void)
-{
- uint64_t data;
-
- data = test_rdmsr(MSR_FS_BASE);
- GUEST_ASSERT(data == MSR_FS_BASE);
- data = test_rdmsr(MSR_GS_BASE);
- GUEST_ASSERT(data != MSR_GS_BASE);
-
- /* Let userspace know to switch the filter */
- GUEST_SYNC(0);
-
- data = test_rdmsr(MSR_FS_BASE);
- GUEST_ASSERT(data != MSR_FS_BASE);
- data = test_rdmsr(MSR_GS_BASE);
- GUEST_ASSERT(data == MSR_GS_BASE);
-
- GUEST_DONE();
-}
-
-static void __guest_gp_handler(struct ex_regs *regs,
- char *r_start, char *r_end,
- char *w_start, char *w_end)
-{
- if (regs->rip == (uintptr_t)r_start) {
- regs->rip = (uintptr_t)r_end;
- regs->rax = 0;
- regs->rdx = 0;
- } else if (regs->rip == (uintptr_t)w_start) {
- regs->rip = (uintptr_t)w_end;
- } else {
- GUEST_ASSERT(!"RIP is at an unknown location!");
- }
-
- ++guest_exception_count;
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
- __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
- &wrmsr_start, &wrmsr_end);
-}
-
-static void guest_fep_gp_handler(struct ex_regs *regs)
-{
- __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
- &em_wrmsr_start, &em_wrmsr_end);
-}
-
-static void check_for_guest_assert(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- if (vcpu->run->exit_reason == KVM_EXIT_IO &&
- get_ucall(vcpu, &uc) == UCALL_ABORT) {
- REPORT_GUEST_ASSERT(uc);
- }
-}
-
-static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
- struct kvm_run *run = vcpu->run;
-
- check_for_guest_assert(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
- TEST_ASSERT(run->msr.index == msr_index,
- "Unexpected msr (0x%04x), expected 0x%04x",
- run->msr.index, msr_index);
-
- switch (run->msr.index) {
- case MSR_IA32_XSS:
- run->msr.data = 0;
- break;
- case MSR_IA32_FLUSH_CMD:
- run->msr.error = 1;
- break;
- case MSR_NON_EXISTENT:
- run->msr.data = msr_non_existent_data;
- break;
- case MSR_FS_BASE:
- run->msr.data = MSR_FS_BASE;
- break;
- case MSR_GS_BASE:
- run->msr.data = MSR_GS_BASE;
- break;
- default:
- TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
- }
-}
-
-static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
- struct kvm_run *run = vcpu->run;
-
- check_for_guest_assert(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
- TEST_ASSERT(run->msr.index == msr_index,
- "Unexpected msr (0x%04x), expected 0x%04x",
- run->msr.index, msr_index);
-
- switch (run->msr.index) {
- case MSR_IA32_XSS:
- if (run->msr.data != 0)
- run->msr.error = 1;
- break;
- case MSR_IA32_FLUSH_CMD:
- if (run->msr.data != 1)
- run->msr.error = 1;
- break;
- case MSR_NON_EXISTENT:
- msr_non_existent_data = run->msr.data;
- break;
- default:
- TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
- }
-}
-
-static void process_ucall_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- check_for_guest_assert(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
- "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
- uc.cmd, UCALL_DONE);
-}
-
-static uint64_t process_ucall(struct kvm_vcpu *vcpu)
-{
- struct ucall uc = {};
-
- check_for_guest_assert(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- break;
- case UCALL_ABORT:
- check_for_guest_assert(vcpu);
- break;
- case UCALL_DONE:
- process_ucall_done(vcpu);
- break;
- default:
- TEST_ASSERT(false, "Unexpected ucall");
- }
-
- return uc.cmd;
-}
-
-static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
- uint32_t msr_index)
-{
- vcpu_run(vcpu);
- process_rdmsr(vcpu, msr_index);
-}
-
-static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
- uint32_t msr_index)
-{
- vcpu_run(vcpu);
- process_wrmsr(vcpu, msr_index);
-}
-
-static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
-{
- vcpu_run(vcpu);
- return process_ucall(vcpu);
-}
-
-static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
-{
- vcpu_run(vcpu);
- process_ucall_done(vcpu);
-}
-
-KVM_ONE_VCPU_TEST_SUITE(user_msr);
-
-KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
-{
- struct kvm_vm *vm = vcpu->vm;
- uint64_t cmd;
- int rc;
-
- rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
- TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
-
- rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
- TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
- vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
-
- vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
- /* Process guest code userspace exits. */
- run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-
- run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-
- run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
- run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
-
- vcpu_run(vcpu);
- cmd = process_ucall(vcpu);
-
- if (is_forced_emulation_enabled) {
- TEST_ASSERT_EQ(cmd, UCALL_SYNC);
- vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
-
- /* Process emulated rdmsr and wrmsr instructions. */
- run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-
- run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-
- run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
- run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
-
- /* Confirm the guest completed without issues. */
- run_guest_then_process_ucall_done(vcpu);
- } else {
- TEST_ASSERT_EQ(cmd, UCALL_DONE);
- printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
- }
-}
-
-static int handle_ucall(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_SYNC:
- vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
- break;
- case UCALL_DONE:
- return 1;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- return 0;
-}
-
-static void handle_rdmsr(struct kvm_run *run)
-{
- run->msr.data = run->msr.index;
- msr_reads++;
-
- if (run->msr.index == MSR_SYSCALL_MASK ||
- run->msr.index == MSR_GS_BASE) {
- TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
- "MSR read trap w/o access fault");
- }
-
- if (run->msr.index == 0xdeadbeef) {
- TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
- "MSR deadbeef read trap w/o inval fault");
- }
-}
-
-static void handle_wrmsr(struct kvm_run *run)
-{
- /* ignore */
- msr_writes++;
-
- if (run->msr.index == MSR_IA32_POWER_CTL) {
- TEST_ASSERT(run->msr.data == 0x1234,
- "MSR data for MSR_IA32_POWER_CTL incorrect");
- TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
- "MSR_IA32_POWER_CTL trap w/o access fault");
- }
-
- if (run->msr.index == 0xdeadbeef) {
- TEST_ASSERT(run->msr.data == 0x1234,
- "MSR data for deadbeef incorrect");
- TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
- "deadbeef trap w/o inval fault");
- }
-}
-
-KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
-{
- struct kvm_vm *vm = vcpu->vm;
- struct kvm_run *run = vcpu->run;
- int rc;
-
- rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
- TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
- KVM_MSR_EXIT_REASON_UNKNOWN |
- KVM_MSR_EXIT_REASON_FILTER);
-
- rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
- TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
- prepare_bitmaps();
- vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
-
- while (1) {
- vcpu_run(vcpu);
-
- switch (run->exit_reason) {
- case KVM_EXIT_X86_RDMSR:
- handle_rdmsr(run);
- break;
- case KVM_EXIT_X86_WRMSR:
- handle_wrmsr(run);
- break;
- case KVM_EXIT_IO:
- if (handle_ucall(vcpu))
- goto done;
- break;
- }
-
- }
-
-done:
- TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
- TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-}
-
-KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
-{
- struct kvm_vm *vm = vcpu->vm;
- int rc;
-
- rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
- TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
-
- rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
- TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
- vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
- run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
- TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
- "Expected ucall state to be UCALL_SYNC.");
- vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
- run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
- run_guest_then_process_ucall_done(vcpu);
-}
-
-#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \
-({ \
- int r = __vm_ioctl(vm, cmd, arg); \
- \
- if (flag & valid_mask) \
- TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \
- else \
- TEST_ASSERT(r == -1 && errno == EINVAL, \
- "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \
- #cmd, flag, r, errno, strerror(errno)); \
-})
-
-static void run_user_space_msr_flag_test(struct kvm_vm *vm)
-{
- struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
- int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
- int rc;
- int i;
-
- rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
- TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-
- for (i = 0; i < nflags; i++) {
- cap.args[0] = BIT_ULL(i);
- test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
- BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
- }
-}
-
-static void run_msr_filter_flag_test(struct kvm_vm *vm)
-{
- u64 deny_bits = 0;
- struct kvm_msr_filter filter = {
- .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
- .ranges = {
- {
- .flags = KVM_MSR_FILTER_READ,
- .nmsrs = 1,
- .base = 0,
- .bitmap = (uint8_t *)&deny_bits,
- },
- },
- };
- int nflags;
- int rc;
- int i;
-
- rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
- TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
- nflags = sizeof(filter.flags) * BITS_PER_BYTE;
- for (i = 0; i < nflags; i++) {
- filter.flags = BIT_ULL(i);
- test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
- BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
- }
-
- filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
- nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
- for (i = 0; i < nflags; i++) {
- filter.ranges[0].flags = BIT_ULL(i);
- test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
- BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
- }
-}
-
-/* Test that attempts to write to the unused bits in a flag fails. */
-KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
-{
- struct kvm_vm *vm = vcpu->vm;
-
- /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
- run_user_space_msr_flag_test(vm);
-
- /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
- run_msr_filter_flag_test(vm);
-}
-
-int main(int argc, char *argv[])
-{
- return test_harness_run(argc, argv);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_apic_access_test
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * The first subtest simply checks to see that an L2 guest can be
- * launched with a valid APIC-access address that is backed by a
- * page of L1 physical memory.
- *
- * The second subtest sets the APIC-access address to a (valid) L1
- * physical address that is not backed by memory. KVM can't handle
- * this situation, so resuming L2 should result in a KVM exit for
- * internal error (emulation). This is not an architectural
- * requirement. It is just a shortcoming of KVM. The internal error
- * is unfortunate, but it's better than what used to happen!
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-static void l2_guest_code(void)
-{
- /* Exit to L1 */
- __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
- control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
- vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
-
- /* Try to launch L2 with the memory-backed APIC-access address. */
- GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- vmwrite(APIC_ACCESS_ADDR, high_gpa);
-
- /* Try to resume L2 with the unbacked APIC-access address. */
- GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- unsigned long apic_access_addr = ~0ul;
- vm_vaddr_t vmx_pages_gva;
- unsigned long high_gpa;
- struct vmx_pages *vmx;
- bool done = false;
-
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
- high_gpa = (vm->max_gfn - 1) << vm->page_shift;
-
- vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- prepare_virtualize_apic_accesses(vmx, vm);
- vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
-
- while (!done) {
- volatile struct kvm_run *run = vcpu->run;
- struct ucall uc;
-
- vcpu_run(vcpu);
- if (apic_access_addr == high_gpa) {
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
- TEST_ASSERT(run->internal.suberror ==
- KVM_INTERNAL_ERROR_EMULATION,
- "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
- run->internal.suberror);
- break;
- }
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- apic_access_addr = uc.args[1];
- break;
- case UCALL_DONE:
- done = true;
- break;
- default:
- TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
- }
- }
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_close_while_nested
- *
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Verify that nothing bad happens if a KVM user exits with open
- * file descriptors while executing a nested guest.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-enum {
- PORT_L0_EXIT = 0x2000,
-};
-
-static void l2_guest_code(void)
-{
- /* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- for (;;) {
- volatile struct kvm_run *run = vcpu->run;
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- if (run->io.port == PORT_L0_EXIT)
- break;
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX 1
-#define TEST_MEM_PAGES 3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM 0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1 0xc0001000
-#define NESTED_TEST_MEM2 0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
- READ_ONCE(*a);
- WRITE_ONCE(*a, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- /* Exit to L1 and never come back. */
- vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
- l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
- /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
- l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- void *l2_rip;
-
- GUEST_ASSERT(vmx->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx));
- GUEST_ASSERT(load_vmcs(vmx));
-
- if (vmx->eptp_gpa)
- l2_rip = l2_guest_code_ept_enabled;
- else
- l2_rip = l2_guest_code_ept_disabled;
-
- prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(false);
- GUEST_ASSERT(!vmlaunch());
- GUEST_SYNC(false);
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
- vm_vaddr_t vmx_pages_gva = 0;
- struct vmx_pages *vmx;
- unsigned long *bmap;
- uint64_t *host_test_mem;
-
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- bool done = false;
-
- pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- /* Add an extra memory slot for testing dirty logging */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- GUEST_TEST_MEM,
- TEST_MEM_SLOT_INDEX,
- TEST_MEM_PAGES,
- KVM_MEM_LOG_DIRTY_PAGES);
-
- /*
- * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
- * affects both L1 and L2. However...
- */
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
- /*
- * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
- * 0xc0000000.
- *
- * Note that prepare_eptp should be called only L1's GPA map is done,
- * meaning after the last call to virt_map.
- *
- * When EPT is disabled, the L2 guest code will still access the same L1
- * GPAs as the EPT enabled case.
- */
- if (enable_ept) {
- prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
- }
-
- bmap = bitmap_zalloc(TEST_MEM_PAGES);
- host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
- while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- /*
- * The nested guest wrote at offset 0x1000 in the memslot, but the
- * dirty bitmap must be filled in according to L1 GPA, not L2.
- */
- kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
- if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
- } else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
- }
-
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
- break;
- case UCALL_DONE:
- done = true;
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- test_vmx_dirty_log(/*enable_ept=*/false);
-
- if (kvm_cpu_has_ept())
- test_vmx_dirty_log(/*enable_ept=*/true);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <signal.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-
-#include "kselftest.h"
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
- /* Loop on the ud2 until guest state is made invalid. */
-}
-
-static void guest_code(void)
-{
- asm volatile("ud2");
-}
-
-static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
- TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
- "Expected emulation failure, got %d",
- run->emulation_failure.suberror);
-}
-
-static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
-{
- /*
- * Always run twice to verify KVM handles the case where _KVM_ queues
- * an exception with invalid state and then exits to userspace, i.e.
- * that KVM doesn't explode if userspace ignores the initial error.
- */
- __run_vcpu_with_invalid_state(vcpu);
- __run_vcpu_with_invalid_state(vcpu);
-}
-
-static void set_timer(void)
-{
- struct itimerval timer;
-
- timer.it_value.tv_sec = 0;
- timer.it_value.tv_usec = 200;
- timer.it_interval = timer.it_value;
- TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
-}
-
-static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
-{
- static struct kvm_sregs sregs;
-
- if (!sregs.cr0)
- vcpu_sregs_get(vcpu, &sregs);
- sregs.tr.unusable = !!set;
- vcpu_sregs_set(vcpu, &sregs);
-}
-
-static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
-{
- set_or_clear_invalid_guest_state(vcpu, true);
-}
-
-static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
-{
- set_or_clear_invalid_guest_state(vcpu, false);
-}
-
-static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
-{
- static struct kvm_vcpu *vcpu = NULL;
-
- if (__vcpu)
- vcpu = __vcpu;
- return vcpu;
-}
-
-static void sigalrm_handler(int sig)
-{
- struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
- struct kvm_vcpu_events events;
-
- TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
-
- vcpu_events_get(vcpu, &events);
-
- /*
- * If an exception is pending, attempt KVM_RUN with invalid guest,
- * otherwise rearm the timer and keep doing so until the timer fires
- * between KVM queueing an exception and re-entering the guest.
- */
- if (events.exception.pending) {
- set_invalid_guest_state(vcpu);
- run_vcpu_with_invalid_state(vcpu);
- } else {
- set_timer();
- }
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(host_cpu_is_intel);
- TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- get_set_sigalrm_vcpu(vcpu);
-
- vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
- /*
- * Stuff invalid guest state for L2 by making TR unusuable. The next
- * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
- * emulating invalid guest state for L2.
- */
- set_invalid_guest_state(vcpu);
- run_vcpu_with_invalid_state(vcpu);
-
- /*
- * Verify KVM also handles the case where userspace gains control while
- * an exception is pending and stuffs invalid state. Run with valid
- * guest state and a timer firing every 200us, and attempt to enter the
- * guest with invalid state when the handler interrupts KVM with an
- * exception pending.
- */
- clear_invalid_guest_state(vcpu);
- TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
- "Failed to register SIGALRM handler, errno = %d (%s)",
- errno, strerror(errno));
-
- set_timer();
- run_vcpu_with_invalid_state(vcpu);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define ARBITRARY_IO_PORT 0x2000
-
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
- /*
- * Generate an exit to L0 userspace, i.e. main(), via I/O to an
- * arbitrary port.
- */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- /*
- * L2 must be run without unrestricted guest, verify that the selftests
- * library hasn't enabled it. Because KVM selftests jump directly to
- * 64-bit mode, unrestricted guest support isn't required.
- */
- GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
- !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
-
- GUEST_ASSERT(!vmlaunch());
-
- /* L2 should triple fault after main() stuffs invalid guest state. */
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva;
- struct kvm_sregs sregs;
- struct kvm_vcpu *vcpu;
- struct kvm_run *run;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- vcpu_run(vcpu);
-
- run = vcpu->run;
-
- /*
- * The first exit to L0 userspace should be an I/O access from L2.
- * Running L1 should launch L2 without triggering an exit to userspace.
- */
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
- "Expected IN from port %d from L2, got port %d",
- ARBITRARY_IO_PORT, run->io.port);
-
- /*
- * Stuff invalid guest state for L2 by making TR unusuable. The next
- * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
- * emulating invalid guest state for L2.
- */
- memset(&sregs, 0, sizeof(sregs));
- vcpu_sregs_get(vcpu, &sregs);
- sregs.tr.unusable = 1;
- vcpu_sregs_set(vcpu, &sregs);
-
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_DONE:
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VMX control MSR test
- *
- * Copyright (C) 2022 Google LLC.
- *
- * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
- * that KVM will set owned bits where appropriate, and will not if
- * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
- */
-#include <linux/bitmap.h>
-#include "kvm_util.h"
-#include "vmx.h"
-
-static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
- uint64_t mask)
-{
- uint64_t val = vcpu_get_msr(vcpu, msr_index);
- uint64_t bit;
-
- mask &= val;
-
- for_each_set_bit(bit, &mask, 64) {
- vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
- vcpu_set_msr(vcpu, msr_index, val);
- }
-}
-
-static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
- uint64_t mask)
-{
- uint64_t val = vcpu_get_msr(vcpu, msr_index);
- uint64_t bit;
-
- mask = ~mask | val;
-
- for_each_clear_bit(bit, &mask, 64) {
- vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
- vcpu_set_msr(vcpu, msr_index, val);
- }
-}
-
-static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
- vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
- vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
-}
-
-static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
-{
- vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
- vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
-
- vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
- BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
-
- vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
- BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
- BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
-
- vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
- vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
- vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
- vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
- vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
- vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
- vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
-}
-
-static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
- uint64_t msr_bit,
- struct kvm_x86_cpu_feature feature)
-{
- uint64_t val;
-
- vcpu_clear_cpuid_feature(vcpu, feature);
-
- val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
- vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
- vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
- vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
- vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
- vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
-
- if (!kvm_cpu_has(feature))
- return;
-
- vcpu_set_cpuid_feature(vcpu, feature);
-}
-
-static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
-{
- uint64_t supported_bits = FEAT_CTL_LOCKED |
- FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
- FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
- FEAT_CTL_SGX_LC_ENABLED |
- FEAT_CTL_SGX_ENABLED |
- FEAT_CTL_LMCE_ENABLED;
- int bit, r;
-
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
- __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
-
- for_each_clear_bit(bit, &supported_bits, 64) {
- r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
- TEST_ASSERT(r == 0,
- "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
- }
-}
-
-int main(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- /* No need to actually do KVM_RUN, thus no guest code. */
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
- vmx_save_restore_msrs_test(vcpu);
- ia32_feature_control_msr_test(vcpu);
-
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_nested_tsc_scaling_test
- *
- * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
- *
- * This test case verifies that nested TSC scaling behaves as expected when
- * both L1 and L2 are scaled using different ratios. For this test we scale
- * L1 down and scale L2 up.
- */
-
-#include <time.h>
-
-#include "kvm_util.h"
-#include "vmx.h"
-#include "kselftest.h"
-
-/* L2 is scaled up (from L1's perspective) by this factor */
-#define L2_SCALE_FACTOR 4ULL
-
-#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
-#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
-
-#define L2_GUEST_STACK_SIZE 64
-
-enum { USLEEP, UCHECK_L1, UCHECK_L2 };
-#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
-#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
-
-
-/*
- * This function checks whether the "actual" TSC frequency of a guest matches
- * its expected frequency. In order to account for delays in taking the TSC
- * measurements, a difference of 1% between the actual and the expected value
- * is tolerated.
- */
-static void compare_tsc_freq(uint64_t actual, uint64_t expected)
-{
- uint64_t tolerance, thresh_low, thresh_high;
-
- tolerance = expected / 100;
- thresh_low = expected - tolerance;
- thresh_high = expected + tolerance;
-
- TEST_ASSERT(thresh_low < actual,
- "TSC freq is expected to be between %"PRIu64" and %"PRIu64
- " but it actually is %"PRIu64,
- thresh_low, thresh_high, actual);
- TEST_ASSERT(thresh_high > actual,
- "TSC freq is expected to be between %"PRIu64" and %"PRIu64
- " but it actually is %"PRIu64,
- thresh_low, thresh_high, actual);
-}
-
-static void check_tsc_freq(int level)
-{
- uint64_t tsc_start, tsc_end, tsc_freq;
-
- /*
- * Reading the TSC twice with about a second's difference should give
- * us an approximation of the TSC frequency from the guest's
- * perspective. Now, this won't be completely accurate, but it should
- * be good enough for the purposes of this test.
- */
- tsc_start = rdmsr(MSR_IA32_TSC);
- GUEST_SLEEP(1);
- tsc_end = rdmsr(MSR_IA32_TSC);
-
- tsc_freq = tsc_end - tsc_start;
-
- GUEST_CHECK(level, tsc_freq);
-}
-
-static void l2_guest_code(void)
-{
- check_tsc_freq(UCHECK_L2);
-
- /* exit to L1 */
- __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
-
- /* check that L1's frequency looks alright before launching L2 */
- check_tsc_freq(UCHECK_L1);
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* prepare the VMCS for L2 execution */
- prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- /* enable TSC offsetting and TSC scaling for L2 */
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-
- control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
- control |= SECONDARY_EXEC_TSC_SCALING;
- vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
-
- vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
- vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
- vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
-
- /* launch L2 */
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- /* check that L1's frequency still looks good */
- check_tsc_freq(UCHECK_L1);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- vm_vaddr_t vmx_pages_gva;
-
- uint64_t tsc_start, tsc_end;
- uint64_t tsc_khz;
- uint64_t l1_scale_factor;
- uint64_t l0_tsc_freq = 0;
- uint64_t l1_tsc_freq = 0;
- uint64_t l2_tsc_freq = 0;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
- TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
- /*
- * We set L1's scale factor to be a random number from 2 to 10.
- * Ideally we would do the same for L2's factor but that one is
- * referenced by both main() and l1_guest_code() and using a global
- * variable does not work.
- */
- srand(time(NULL));
- l1_scale_factor = (rand() % 9) + 2;
- printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
- printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
-
- tsc_start = rdtsc();
- sleep(1);
- tsc_end = rdtsc();
-
- l0_tsc_freq = tsc_end - tsc_start;
- printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
-
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
- TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
-
- /* scale down L1's TSC frequency */
- vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
-
- for (;;) {
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- case UCALL_SYNC:
- switch (uc.args[0]) {
- case USLEEP:
- sleep(uc.args[1]);
- break;
- case UCHECK_L1:
- l1_tsc_freq = uc.args[1];
- printf("L1's TSC frequency is around: %"PRIu64
- "\n", l1_tsc_freq);
-
- compare_tsc_freq(l1_tsc_freq,
- l0_tsc_freq / l1_scale_factor);
- break;
- case UCHECK_L2:
- l2_tsc_freq = uc.args[1];
- printf("L2's TSC frequency is around: %"PRIu64
- "\n", l2_tsc_freq);
-
- compare_tsc_freq(l2_tsc_freq,
- l1_tsc_freq * L2_SCALE_FACTOR);
- break;
- }
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for VMX-pmu perf capability msr
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Test to check the effect of various CPUID settings on
- * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
- * we write with KVM_SET_MSR is _not_ modified by the guest
- * and check it can be retrieved with KVM_GET_MSR, also test
- * the invalid LBR formats are rejected.
- */
-#include <sys/ioctl.h>
-
-#include <linux/bitmap.h>
-
-#include "kvm_test_harness.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-static union perf_capabilities {
- struct {
- u64 lbr_format:6;
- u64 pebs_trap:1;
- u64 pebs_arch_reg:1;
- u64 pebs_format:4;
- u64 smm_freeze:1;
- u64 full_width_write:1;
- u64 pebs_baseline:1;
- u64 perf_metrics:1;
- u64 pebs_output_pt_available:1;
- u64 anythread_deprecated:1;
- };
- u64 capabilities;
-} host_cap;
-
-/*
- * The LBR format and most PEBS features are immutable, all other features are
- * fungible (if supported by the host and KVM).
- */
-static const union perf_capabilities immutable_caps = {
- .lbr_format = -1,
- .pebs_trap = 1,
- .pebs_arch_reg = 1,
- .pebs_format = -1,
- .pebs_baseline = 1,
-};
-
-static const union perf_capabilities format_caps = {
- .lbr_format = -1,
- .pebs_format = -1,
-};
-
-static void guest_test_perf_capabilities_gp(uint64_t val)
-{
- uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
-
- __GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP for value '0x%lx', got vector '0x%x'",
- val, vector);
-}
-
-static void guest_code(uint64_t current_val)
-{
- int i;
-
- guest_test_perf_capabilities_gp(current_val);
- guest_test_perf_capabilities_gp(0);
-
- for (i = 0; i < 64; i++)
- guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
-
- GUEST_DONE();
-}
-
-KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
-
-/*
- * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
- * written, that the guest always sees the userspace controlled value, and that
- * PERF_CAPABILITIES is immutable after KVM_RUN.
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
-{
- struct ucall uc;
- int r, i;
-
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
- vcpu_args_set(vcpu, 1, host_cap.capabilities);
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-
- TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
- host_cap.capabilities);
-
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
- r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
- TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
-
- for (i = 0; i < 64; i++) {
- r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
- host_cap.capabilities ^ BIT_ULL(i));
- TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
- host_cap.capabilities ^ BIT_ULL(i));
- }
-}
-
-/*
- * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
- * enabled, as well as '0' (to disable all features).
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
-{
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-}
-
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
-{
- const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
- int bit;
-
- for_each_set_bit(bit, &fungible_caps, 64) {
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
- host_cap.capabilities & ~BIT_ULL(bit));
- }
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-}
-
-/*
- * Verify KVM rejects attempts to set unsupported and/or immutable features in
- * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
- * separately as they are multi-bit values, e.g. toggling or setting a single
- * bit can generate a false positive without dedicated safeguards.
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
-{
- const uint64_t reserved_caps = (~host_cap.capabilities |
- immutable_caps.capabilities) &
- ~format_caps.capabilities;
- union perf_capabilities val = host_cap;
- int r, bit;
-
- for_each_set_bit(bit, &reserved_caps, 64) {
- r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
- host_cap.capabilities ^ BIT_ULL(bit));
- TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
- host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
- BIT_ULL(bit), bit);
- }
-
- /*
- * KVM only supports the host's native LBR format, as well as '0' (to
- * disable LBR support). Verify KVM rejects all other LBR formats.
- */
- for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
- if (val.lbr_format == host_cap.lbr_format)
- continue;
-
- r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
- TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
- val.lbr_format, host_cap.lbr_format);
- }
-
- /* Ditto for the PEBS format. */
- for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
- if (val.pebs_format == host_cap.pebs_format)
- continue;
-
- r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
- TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
- val.pebs_format, host_cap.pebs_format);
- }
-}
-
-/*
- * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
- * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
- * LBR_TOS as those bits are writable across all uarch implementations (arch
- * LBRs will need to poke a different MSR).
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
-{
- int r;
-
- if (!host_cap.lbr_format)
- return;
-
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
-
- vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
-
- r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
- TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
-}
-
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
-{
- uint64_t val;
- int i, r;
-
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
- TEST_ASSERT_EQ(val, host_cap.capabilities);
-
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
-
- val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
- TEST_ASSERT_EQ(val, 0);
-
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-
- for (i = 0; i < 64; i++) {
- r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
- TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
- i, BIT_ULL(i));
- }
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_is_pmu_enabled());
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
-
- TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
- TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
-
- host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
-
- TEST_ASSERT(host_cap.full_width_write,
- "Full-width writes should always be supported");
-
- return test_harness_run(argc, argv);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VMX-preemption timer test
- *
- * Copyright (C) 2020, Google, LLC.
- *
- * Test to ensure the VM-Enter after migration doesn't
- * incorrectly restarts the timer with the full timer
- * value instead of partially decayed timer value
- *
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define PREEMPTION_TIMER_VALUE 100000000ull
-#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
-
-u32 vmx_pt_rate;
-bool l2_save_restore_done;
-static u64 l2_vmx_pt_start;
-volatile u64 l2_vmx_pt_finish;
-
-union vmx_basic basic;
-union vmx_ctrl_msr ctrl_pin_rev;
-union vmx_ctrl_msr ctrl_exit_rev;
-
-void l2_guest_code(void)
-{
- u64 vmx_pt_delta;
-
- vmcall();
- l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
-
- /*
- * Wait until the 1st threshold has passed
- */
- do {
- l2_vmx_pt_finish = rdtsc();
- vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
- vmx_pt_rate;
- } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
-
- /*
- * Force L2 through Save and Restore cycle
- */
- GUEST_SYNC(1);
-
- l2_save_restore_done = 1;
-
- /*
- * Now wait for the preemption timer to fire and
- * exit to L1
- */
- while ((l2_vmx_pt_finish = rdtsc()))
- ;
-}
-
-void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- u64 l1_vmx_pt_start;
- u64 l1_vmx_pt_finish;
- u64 l1_tsc_deadline, l2_tsc_deadline;
-
- GUEST_ASSERT(vmx_pages->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
- GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- /*
- * Check for Preemption timer support
- */
- basic.val = rdmsr(MSR_IA32_VMX_BASIC);
- ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
- : MSR_IA32_VMX_PINBASED_CTLS);
- ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
- : MSR_IA32_VMX_EXIT_CTLS);
-
- if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
- !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
- return;
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
-
- /*
- * Turn on PIN control and resume the guest
- */
- GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
- vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
- PIN_BASED_VMX_PREEMPTION_TIMER));
-
- GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
- PREEMPTION_TIMER_VALUE));
-
- vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
-
- l2_save_restore_done = 0;
-
- l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
-
- GUEST_ASSERT(!vmresume());
-
- l1_vmx_pt_finish = rdtsc();
-
- /*
- * Ensure exit from L2 happens after L2 goes through
- * save and restore
- */
- GUEST_ASSERT(l2_save_restore_done);
-
- /*
- * Ensure the exit from L2 is due to preemption timer expiry
- */
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
-
- l1_tsc_deadline = l1_vmx_pt_start +
- (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
-
- l2_tsc_deadline = l2_vmx_pt_start +
- (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
-
- /*
- * Sync with the host and pass the l1|l2 pt_expiry_finish times and
- * tsc deadlines so that host can verify they are as expected
- */
- GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
- l2_vmx_pt_finish, l2_tsc_deadline);
-}
-
-void guest_code(struct vmx_pages *vmx_pages)
-{
- if (vmx_pages)
- l1_guest_code(vmx_pages);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva = 0;
-
- struct kvm_regs regs1, regs2;
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- struct kvm_x86_state *state;
- struct ucall uc;
- int stage;
-
- /*
- * AMD currently does not implement any VMX features, so for now we
- * just early out.
- */
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vcpu_regs_get(vcpu, ®s1);
-
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- for (stage = 1;; stage++) {
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
- /*
- * If this stage 2 then we should verify the vmx pt expiry
- * is as expected.
- * From L1's perspective verify Preemption timer hasn't
- * expired too early.
- * From L2's perspective verify Preemption timer hasn't
- * expired too late.
- */
- if (stage == 2) {
-
- pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
- stage, uc.args[2], uc.args[3]);
-
- pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
- stage, uc.args[4], uc.args[5]);
-
- TEST_ASSERT(uc.args[2] >= uc.args[3],
- "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
- stage, uc.args[2], uc.args[3]);
-
- TEST_ASSERT(uc.args[4] < uc.args[5],
- "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
- stage, uc.args[4], uc.args[5]);
- }
-
- state = vcpu_save_state(vcpu);
- memset(®s1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, ®s1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
-
- memset(®s2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, ®s2);
- TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- }
-
-done:
- kvm_vm_free(vm);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_set_nested_state_test
- *
- * Copyright (C) 2019, Google LLC.
- *
- * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <errno.h>
-#include <linux/kvm.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-
-/*
- * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
- * changes this should be updated.
- */
-#define VMCS12_REVISION 0x11e57ed0
-
-bool have_evmcs;
-
-void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
-{
- vcpu_nested_state_set(vcpu, state);
-}
-
-void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state,
- int expected_errno)
-{
- int rv;
-
- rv = __vcpu_nested_state_set(vcpu, state);
- TEST_ASSERT(rv == -1 && errno == expected_errno,
- "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
- strerror(expected_errno), expected_errno, rv, strerror(errno),
- errno);
-}
-
-void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- test_nested_state_expect_errno(vcpu, state, EINVAL);
-}
-
-void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- test_nested_state_expect_errno(vcpu, state, EFAULT);
-}
-
-void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
- u32 vmcs12_revision)
-{
- /* Set revision_id in vmcs12 to vmcs12_revision. */
- memcpy(&state->data, &vmcs12_revision, sizeof(u32));
-}
-
-void set_default_state(struct kvm_nested_state *state)
-{
- memset(state, 0, sizeof(*state));
- state->flags = KVM_STATE_NESTED_RUN_PENDING |
- KVM_STATE_NESTED_GUEST_MODE;
- state->format = 0;
- state->size = sizeof(*state);
-}
-
-void set_default_vmx_state(struct kvm_nested_state *state, int size)
-{
- memset(state, 0, size);
- if (have_evmcs)
- state->flags = KVM_STATE_NESTED_EVMCS;
- state->format = 0;
- state->size = size;
- state->hdr.vmx.vmxon_pa = 0x1000;
- state->hdr.vmx.vmcs12_pa = 0x2000;
- state->hdr.vmx.smm.flags = 0;
- set_revision_id_for_vmcs12(state, VMCS12_REVISION);
-}
-
-void test_vmx_nested_state(struct kvm_vcpu *vcpu)
-{
- /* Add a page for VMCS12. */
- const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
- struct kvm_nested_state *state =
- (struct kvm_nested_state *)malloc(state_sz);
-
- /* The format must be set to 0. 0 for VMX, 1 for SVM. */
- set_default_vmx_state(state, state_sz);
- state->format = 1;
- test_nested_state_expect_einval(vcpu, state);
-
- /*
- * We cannot virtualize anything if the guest does not have VMX
- * enabled.
- */
- set_default_vmx_state(state, state_sz);
- test_nested_state_expect_einval(vcpu, state);
-
- /*
- * We cannot virtualize anything if the guest does not have VMX
- * enabled. We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
- * is set to -1ull, but the flags must be zero.
- */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.vmxon_pa = -1ull;
- test_nested_state_expect_einval(vcpu, state);
-
- state->hdr.vmx.vmcs12_pa = -1ull;
- state->flags = KVM_STATE_NESTED_EVMCS;
- test_nested_state_expect_einval(vcpu, state);
-
- state->flags = 0;
- test_nested_state(vcpu, state);
-
- /* Enable VMX in the guest CPUID. */
- vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
-
- /*
- * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
- * setting the nested state. When the eVMCS flag is not set, the
- * expected return value is '0'.
- */
- set_default_vmx_state(state, state_sz);
- state->flags = 0;
- state->hdr.vmx.vmxon_pa = -1ull;
- state->hdr.vmx.vmcs12_pa = -1ull;
- test_nested_state(vcpu, state);
-
- /*
- * When eVMCS is supported, the eVMCS flag can only be set if the
- * enlightened VMCS capability has been enabled.
- */
- if (have_evmcs) {
- state->flags = KVM_STATE_NESTED_EVMCS;
- test_nested_state_expect_einval(vcpu, state);
- vcpu_enable_evmcs(vcpu);
- test_nested_state(vcpu, state);
- }
-
- /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
- state->hdr.vmx.smm.flags = 1;
- test_nested_state_expect_einval(vcpu, state);
-
- /* Invalid flags are rejected. */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.flags = ~0;
- test_nested_state_expect_einval(vcpu, state);
-
- /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.vmxon_pa = -1ull;
- state->flags = 0;
- test_nested_state_expect_einval(vcpu, state);
-
- /* It is invalid to have vmxon_pa set to a non-page aligned address. */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.vmxon_pa = 1;
- test_nested_state_expect_einval(vcpu, state);
-
- /*
- * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
- * KVM_STATE_NESTED_GUEST_MODE set together.
- */
- set_default_vmx_state(state, state_sz);
- state->flags = KVM_STATE_NESTED_GUEST_MODE |
- KVM_STATE_NESTED_RUN_PENDING;
- state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vcpu, state);
-
- /*
- * It is invalid to have any of the SMM flags set besides:
- * KVM_STATE_NESTED_SMM_GUEST_MODE
- * KVM_STATE_NESTED_SMM_VMXON
- */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
- KVM_STATE_NESTED_SMM_VMXON);
- test_nested_state_expect_einval(vcpu, state);
-
- /* Outside SMM, SMM flags must be zero. */
- set_default_vmx_state(state, state_sz);
- state->flags = 0;
- state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vcpu, state);
-
- /*
- * Size must be large enough to fit kvm_nested_state and vmcs12
- * if VMCS12 physical address is set
- */
- set_default_vmx_state(state, state_sz);
- state->size = sizeof(*state);
- state->flags = 0;
- test_nested_state_expect_einval(vcpu, state);
-
- set_default_vmx_state(state, state_sz);
- state->size = sizeof(*state);
- state->flags = 0;
- state->hdr.vmx.vmcs12_pa = -1;
- test_nested_state(vcpu, state);
-
- /*
- * KVM_SET_NESTED_STATE succeeds with invalid VMCS
- * contents but L2 not running.
- */
- set_default_vmx_state(state, state_sz);
- state->flags = 0;
- test_nested_state(vcpu, state);
-
- /* Invalid flags are rejected, even if no VMCS loaded. */
- set_default_vmx_state(state, state_sz);
- state->size = sizeof(*state);
- state->flags = 0;
- state->hdr.vmx.vmcs12_pa = -1;
- state->hdr.vmx.flags = ~0;
- test_nested_state_expect_einval(vcpu, state);
-
- /* vmxon_pa cannot be the same address as vmcs_pa. */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.vmxon_pa = 0;
- state->hdr.vmx.vmcs12_pa = 0;
- test_nested_state_expect_einval(vcpu, state);
-
- /*
- * Test that if we leave nesting the state reflects that when we get
- * it again.
- */
- set_default_vmx_state(state, state_sz);
- state->hdr.vmx.vmxon_pa = -1ull;
- state->hdr.vmx.vmcs12_pa = -1ull;
- state->flags = 0;
- test_nested_state(vcpu, state);
- vcpu_nested_state_get(vcpu, state);
- TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
- "Size must be between %ld and %d. The size returned was %d.",
- sizeof(*state), state_sz, state->size);
- TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
- TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
-
- free(state);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_nested_state state;
- struct kvm_vcpu *vcpu;
-
- have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
-
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-
- /*
- * AMD currently does not implement set_nested_state, so for now we
- * just early out.
- */
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
- /*
- * First run tests with VMX disabled to check error handling.
- */
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
-
- /* Passing a NULL kvm_nested_state causes a EFAULT. */
- test_nested_state_expect_efault(vcpu, NULL);
-
- /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
- set_default_state(&state);
- state.size = 0;
- test_nested_state_expect_einval(vcpu, &state);
-
- /*
- * Setting the flags 0xf fails the flags check. The only flags that
- * can be used are:
- * KVM_STATE_NESTED_GUEST_MODE
- * KVM_STATE_NESTED_RUN_PENDING
- * KVM_STATE_NESTED_EVMCS
- */
- set_default_state(&state);
- state.flags = 0xf;
- test_nested_state_expect_einval(vcpu, &state);
-
- /*
- * If KVM_STATE_NESTED_RUN_PENDING is set then
- * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
- */
- set_default_state(&state);
- state.flags = KVM_STATE_NESTED_RUN_PENDING;
- test_nested_state_expect_einval(vcpu, &state);
-
- test_vmx_nested_state(vcpu);
-
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_tsc_adjust_test
- *
- * Copyright (C) 2018, Google LLC.
- *
- * IA32_TSC_ADJUST test
- *
- * According to the SDM, "if an execution of WRMSR to the
- * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
- * the logical processor also adds (or subtracts) value X from the
- * IA32_TSC_ADJUST MSR.
- *
- * Note that when L1 doesn't intercept writes to IA32_TSC, a
- * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
- * value.
- *
- * This test verifies that this unusual case is handled correctly.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#ifndef MSR_IA32_TSC_ADJUST
-#define MSR_IA32_TSC_ADJUST 0x3b
-#endif
-
-#define TSC_ADJUST_VALUE (1ll << 32)
-#define TSC_OFFSET_VALUE -(1ll << 48)
-
-enum {
- PORT_ABORT = 0x1000,
- PORT_REPORT,
- PORT_DONE,
-};
-
-enum {
- VMXON_PAGE = 0,
- VMCS_PAGE,
- MSR_BITMAP_PAGE,
-
- NUM_VMX_PAGES,
-};
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void check_ia32_tsc_adjust(int64_t max)
-{
- int64_t adjust;
-
- adjust = rdmsr(MSR_IA32_TSC_ADJUST);
- GUEST_SYNC(adjust);
- GUEST_ASSERT(adjust <= max);
-}
-
-static void l2_guest_code(void)
-{
- uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
-
- wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
- check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
- /* Exit to L1 */
- __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
-
- GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
- wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
- check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
- GUEST_DONE();
-}
-
-static void report(int64_t val)
-{
- pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
- val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva;
- struct kvm_vcpu *vcpu;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
-
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- for (;;) {
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- report(uc.args[1]);
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * xapic_ipi_test
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
- * another vCPU that is halted when KVM's backing page for the APIC access
- * address has been moved by mm.
- *
- * The test starts two vCPUs: one that sends IPIs and one that continually
- * executes HLT. The sender checks that the halter has woken from the HLT and
- * has reentered HLT before sending the next IPI. While the vCPUs are running,
- * the host continually calls migrate_pages to move all of the process' pages
- * amongst the available numa nodes on the machine.
- *
- * Migration is a command line option. When used on non-numa machines will
- * exit with error. Test is still usefull on non-numa for testing IPIs.
- */
-#include <getopt.h>
-#include <pthread.h>
-#include <inttypes.h>
-#include <string.h>
-#include <time.h>
-
-#include "kvm_util.h"
-#include "numaif.h"
-#include "processor.h"
-#include "test_util.h"
-#include "vmx.h"
-
-/* Default running time for the test */
-#define DEFAULT_RUN_SECS 3
-
-/* Default delay between migrate_pages calls (microseconds) */
-#define DEFAULT_DELAY_USECS 500000
-
-/*
- * Vector for IPI from sender vCPU to halting vCPU.
- * Value is arbitrary and was chosen for the alternating bit pattern. Any
- * value should work.
- */
-#define IPI_VECTOR 0xa5
-
-/*
- * Incremented in the IPI handler. Provides evidence to the sender that the IPI
- * arrived at the destination
- */
-static volatile uint64_t ipis_rcvd;
-
-/* Data struct shared between host main thread and vCPUs */
-struct test_data_page {
- uint32_t halter_apic_id;
- volatile uint64_t hlt_count;
- volatile uint64_t wake_count;
- uint64_t ipis_sent;
- uint64_t migrations_attempted;
- uint64_t migrations_completed;
- uint32_t icr;
- uint32_t icr2;
- uint32_t halter_tpr;
- uint32_t halter_ppr;
-
- /*
- * Record local version register as a cross-check that APIC access
- * worked. Value should match what KVM reports (APIC_VERSION in
- * arch/x86/kvm/lapic.c). If test is failing, check that values match
- * to determine whether APIC access exits are working.
- */
- uint32_t halter_lvr;
-};
-
-struct thread_params {
- struct test_data_page *data;
- struct kvm_vcpu *vcpu;
- uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
-};
-
-void verify_apic_base_addr(void)
-{
- uint64_t msr = rdmsr(MSR_IA32_APICBASE);
- uint64_t base = GET_APIC_BASE(msr);
-
- GUEST_ASSERT(base == APIC_DEFAULT_GPA);
-}
-
-static void halter_guest_code(struct test_data_page *data)
-{
- verify_apic_base_addr();
- xapic_enable();
-
- data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
- data->halter_lvr = xapic_read_reg(APIC_LVR);
-
- /*
- * Loop forever HLTing and recording halts & wakes. Disable interrupts
- * each time around to minimize window between signaling the pending
- * halt to the sender vCPU and executing the halt. No need to disable on
- * first run as this vCPU executes first and the host waits for it to
- * signal going into first halt before starting the sender vCPU. Record
- * TPR and PPR for diagnostic purposes in case the test fails.
- */
- for (;;) {
- data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
- data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
- data->hlt_count++;
- asm volatile("sti; hlt; cli");
- data->wake_count++;
- }
-}
-
-/*
- * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
- * enable diagnosing errant writes to the APIC access address backing page in
- * case of test failure.
- */
-static void guest_ipi_handler(struct ex_regs *regs)
-{
- ipis_rcvd++;
- xapic_write_reg(APIC_EOI, 77);
-}
-
-static void sender_guest_code(struct test_data_page *data)
-{
- uint64_t last_wake_count;
- uint64_t last_hlt_count;
- uint64_t last_ipis_rcvd_count;
- uint32_t icr_val;
- uint32_t icr2_val;
- uint64_t tsc_start;
-
- verify_apic_base_addr();
- xapic_enable();
-
- /*
- * Init interrupt command register for sending IPIs
- *
- * Delivery mode=fixed, per SDM:
- * "Delivers the interrupt specified in the vector field to the target
- * processor."
- *
- * Destination mode=physical i.e. specify target by its local APIC
- * ID. This vCPU assumes that the halter vCPU has already started and
- * set data->halter_apic_id.
- */
- icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
- icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
- data->icr = icr_val;
- data->icr2 = icr2_val;
-
- last_wake_count = data->wake_count;
- last_hlt_count = data->hlt_count;
- last_ipis_rcvd_count = ipis_rcvd;
- for (;;) {
- /*
- * Send IPI to halter vCPU.
- * First IPI can be sent unconditionally because halter vCPU
- * starts earlier.
- */
- xapic_write_reg(APIC_ICR2, icr2_val);
- xapic_write_reg(APIC_ICR, icr_val);
- data->ipis_sent++;
-
- /*
- * Wait up to ~1 sec for halter to indicate that it has:
- * 1. Received the IPI
- * 2. Woken up from the halt
- * 3. Gone back into halt
- * Current CPUs typically run at 2.x Ghz which is ~2
- * billion ticks per second.
- */
- tsc_start = rdtsc();
- while (rdtsc() - tsc_start < 2000000000) {
- if ((ipis_rcvd != last_ipis_rcvd_count) &&
- (data->wake_count != last_wake_count) &&
- (data->hlt_count != last_hlt_count))
- break;
- }
-
- GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
- (data->wake_count != last_wake_count) &&
- (data->hlt_count != last_hlt_count));
-
- last_wake_count = data->wake_count;
- last_hlt_count = data->hlt_count;
- last_ipis_rcvd_count = ipis_rcvd;
- }
-}
-
-static void *vcpu_thread(void *arg)
-{
- struct thread_params *params = (struct thread_params *)arg;
- struct kvm_vcpu *vcpu = params->vcpu;
- struct ucall uc;
- int old;
- int r;
-
- r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
- TEST_ASSERT(r == 0,
- "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
- vcpu->id, r);
-
- fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
- vcpu_run(vcpu);
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
- TEST_ASSERT(false,
- "vCPU %u exited with error: %s.\n"
- "Sending vCPU sent %lu IPIs to halting vCPU\n"
- "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
- "Halter TPR=%#x PPR=%#x LVR=%#x\n"
- "Migrations attempted: %lu\n"
- "Migrations completed: %lu",
- vcpu->id, (const char *)uc.args[0],
- params->data->ipis_sent, params->data->hlt_count,
- params->data->wake_count,
- *params->pipis_rcvd, params->data->halter_tpr,
- params->data->halter_ppr, params->data->halter_lvr,
- params->data->migrations_attempted,
- params->data->migrations_completed);
- }
-
- return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
- void *retval;
- int r;
-
- r = pthread_cancel(thread);
- TEST_ASSERT(r == 0,
- "pthread_cancel on vcpu_id=%d failed with errno=%d",
- vcpu->id, r);
-
- r = pthread_join(thread, &retval);
- TEST_ASSERT(r == 0,
- "pthread_join on vcpu_id=%d failed with errno=%d",
- vcpu->id, r);
- TEST_ASSERT(retval == PTHREAD_CANCELED,
- "expected retval=%p, got %p", PTHREAD_CANCELED,
- retval);
-}
-
-void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
- uint64_t *pipis_rcvd)
-{
- long pages_not_moved;
- unsigned long nodemask = 0;
- unsigned long nodemasks[sizeof(nodemask) * 8];
- int nodes = 0;
- time_t start_time, last_update, now;
- time_t interval_secs = 1;
- int i, r;
- int from, to;
- unsigned long bit;
- uint64_t hlt_count;
- uint64_t wake_count;
- uint64_t ipis_sent;
-
- fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
- delay_usecs);
-
- /* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
- 0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
-
- fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
- "(each 1-bit indicates node is present): %#lx\n",
- sizeof(nodemask) * 8, nodemask);
-
- /* Init array of masks containing a single-bit in each, one for each
- * available node. migrate_pages called below requires specifying nodes
- * as bit masks.
- */
- for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
- if (nodemask & bit) {
- nodemasks[nodes] = nodemask & bit;
- nodes++;
- }
- }
-
- TEST_ASSERT(nodes > 1,
- "Did not find at least 2 numa nodes. Can't do migration");
-
- fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
-
- from = 0;
- to = 1;
- start_time = time(NULL);
- last_update = start_time;
-
- ipis_sent = data->ipis_sent;
- hlt_count = data->hlt_count;
- wake_count = data->wake_count;
-
- while ((int)(time(NULL) - start_time) < run_secs) {
- data->migrations_attempted++;
-
- /*
- * migrate_pages with PID=0 will migrate all pages of this
- * process between the nodes specified as bitmasks. The page
- * backing the APIC access address belongs to this process
- * because it is allocated by KVM in the context of the
- * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
- * test may break or give a false positive signal.
- */
- pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
- &nodemasks[from],
- &nodemasks[to]);
- if (pages_not_moved < 0)
- fprintf(stderr,
- "migrate_pages failed, errno=%d\n", errno);
- else if (pages_not_moved > 0)
- fprintf(stderr,
- "migrate_pages could not move %ld pages\n",
- pages_not_moved);
- else
- data->migrations_completed++;
-
- from = to;
- to++;
- if (to == nodes)
- to = 0;
-
- now = time(NULL);
- if (((now - start_time) % interval_secs == 0) &&
- (now != last_update)) {
- last_update = now;
- fprintf(stderr,
- "%lu seconds: Migrations attempted=%lu completed=%lu, "
- "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
- now - start_time, data->migrations_attempted,
- data->migrations_completed,
- data->ipis_sent, *pipis_rcvd,
- data->hlt_count, data->wake_count);
-
- TEST_ASSERT(ipis_sent != data->ipis_sent &&
- hlt_count != data->hlt_count &&
- wake_count != data->wake_count,
- "IPI, HLT and wake count have not increased "
- "in the last %lu seconds. "
- "HLTer is likely hung.", interval_secs);
-
- ipis_sent = data->ipis_sent;
- hlt_count = data->hlt_count;
- wake_count = data->wake_count;
- }
- usleep(delay_usecs);
- }
-}
-
-void get_cmdline_args(int argc, char *argv[], int *run_secs,
- bool *migrate, int *delay_usecs)
-{
- for (;;) {
- int opt = getopt(argc, argv, "s:d:m");
-
- if (opt == -1)
- break;
- switch (opt) {
- case 's':
- *run_secs = parse_size(optarg);
- break;
- case 'm':
- *migrate = true;
- break;
- case 'd':
- *delay_usecs = parse_size(optarg);
- break;
- default:
- TEST_ASSERT(false,
- "Usage: -s <runtime seconds>. Default is %d seconds.\n"
- "-m adds calls to migrate_pages while vCPUs are running."
- " Default is no migrations.\n"
- "-d <delay microseconds> - delay between migrate_pages() calls."
- " Default is %d microseconds.",
- DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
- }
- }
-}
-
-int main(int argc, char *argv[])
-{
- int r;
- int wait_secs;
- const int max_halter_wait = 10;
- int run_secs = 0;
- int delay_usecs = 0;
- struct test_data_page *data;
- vm_vaddr_t test_data_page_vaddr;
- bool migrate = false;
- pthread_t threads[2];
- struct thread_params params[2];
- struct kvm_vm *vm;
- uint64_t *pipis_rcvd;
-
- get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
- if (run_secs <= 0)
- run_secs = DEFAULT_RUN_SECS;
- if (delay_usecs <= 0)
- delay_usecs = DEFAULT_DELAY_USECS;
-
- vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code);
-
- vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
-
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
- params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
-
- test_data_page_vaddr = vm_vaddr_alloc_page(vm);
- data = addr_gva2hva(vm, test_data_page_vaddr);
- memset(data, 0, sizeof(*data));
- params[0].data = data;
- params[1].data = data;
-
- vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
- vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
-
- pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
- params[0].pipis_rcvd = pipis_rcvd;
- params[1].pipis_rcvd = pipis_rcvd;
-
- /* Start halter vCPU thread and wait for it to execute first HLT. */
- r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]);
- TEST_ASSERT(r == 0,
- "pthread_create halter failed errno=%d", errno);
- fprintf(stderr, "Halter vCPU thread started\n");
-
- wait_secs = 0;
- while ((wait_secs < max_halter_wait) && !data->hlt_count) {
- sleep(1);
- wait_secs++;
- }
-
- TEST_ASSERT(data->hlt_count,
- "Halter vCPU did not execute first HLT within %d seconds",
- max_halter_wait);
-
- fprintf(stderr,
- "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
- data->halter_apic_id, wait_secs);
-
- r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]);
- TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
-
- fprintf(stderr,
- "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
- run_secs);
-
- if (!migrate)
- sleep(run_secs);
- else
- do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
-
- /*
- * Cancel threads and wait for them to stop.
- */
- cancel_join_vcpu_thread(threads[0], params[0].vcpu);
- cancel_join_vcpu_thread(threads[1], params[1].vcpu);
-
- fprintf(stderr,
- "Test successful after running for %d seconds.\n"
- "Sending vCPU sent %lu IPIs to halting vCPU\n"
- "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
- "Halter APIC ID=%#x\n"
- "Sender ICR value=%#x ICR2 value=%#x\n"
- "Halter TPR=%#x PPR=%#x LVR=%#x\n"
- "Migrations attempted: %lu\n"
- "Migrations completed: %lu\n",
- run_secs, data->ipis_sent,
- data->hlt_count, data->wake_count, *pipis_rcvd,
- data->halter_apic_id,
- data->icr, data->icr2,
- data->halter_tpr, data->halter_ppr, data->halter_lvr,
- data->migrations_attempted, data->migrations_completed);
-
- kvm_vm_free(vm);
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "apic.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-struct xapic_vcpu {
- struct kvm_vcpu *vcpu;
- bool is_x2apic;
- bool has_xavic_errata;
-};
-
-static void xapic_guest_code(void)
-{
- asm volatile("cli");
-
- xapic_enable();
-
- while (1) {
- uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
- (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
-
- xapic_write_reg(APIC_ICR2, val >> 32);
- xapic_write_reg(APIC_ICR, val);
- GUEST_SYNC(val);
- }
-}
-
-#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \
- GENMASK_ULL(17, 16) | \
- GENMASK_ULL(13, 13))
-
-static void x2apic_guest_code(void)
-{
- asm volatile("cli");
-
- x2apic_enable();
-
- do {
- uint64_t val = x2apic_read_reg(APIC_IRR) |
- x2apic_read_reg(APIC_IRR + 0x10) << 32;
-
- if (val & X2APIC_RSVD_BITS_MASK) {
- x2apic_write_reg_fault(APIC_ICR, val);
- } else {
- x2apic_write_reg(APIC_ICR, val);
- GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
- }
- GUEST_SYNC(val);
- } while (1);
-}
-
-static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
-{
- struct kvm_vcpu *vcpu = x->vcpu;
- struct kvm_lapic_state xapic;
- struct ucall uc;
- uint64_t icr;
-
- /*
- * Tell the guest what ICR value to write. Use the IRR to pass info,
- * all bits are valid and should not be modified by KVM (ignoring the
- * fact that vectors 0-15 are technically illegal).
- */
- vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
- *((u32 *)&xapic.regs[APIC_IRR]) = val;
- *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
- vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
-
- vcpu_run(vcpu);
- TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
- TEST_ASSERT_EQ(uc.args[1], val);
-
- vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
- icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
- (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
- if (!x->is_x2apic) {
- if (!x->has_xavic_errata)
- val &= (-1u | (0xffull << (32 + 24)));
- } else if (val & X2APIC_RSVD_BITS_MASK) {
- return;
- }
-
- if (x->has_xavic_errata)
- TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
- else
- TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
-}
-
-static void __test_icr(struct xapic_vcpu *x, uint64_t val)
-{
- /*
- * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
- * it is as _must_ be zero. Intel simply ignores the bit. Don't test
- * the BUSY bit for x2APIC, as there is no single correct behavior.
- */
- if (!x->is_x2apic)
- ____test_icr(x, val | APIC_ICR_BUSY);
-
- ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
-}
-
-static void test_icr(struct xapic_vcpu *x)
-{
- struct kvm_vcpu *vcpu = x->vcpu;
- uint64_t icr, i, j;
-
- icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
- for (i = 0; i <= 0xff; i++)
- __test_icr(x, icr | i);
-
- icr = APIC_INT_ASSERT | APIC_DM_FIXED;
- for (i = 0; i <= 0xff; i++)
- __test_icr(x, icr | i);
-
- /*
- * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
- * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
- */
- icr = APIC_INT_ASSERT | 0xff;
- for (i = 0; i < 0xff; i++) {
- if (i == vcpu->id)
- continue;
- for (j = 0; j < 8; j++)
- __test_icr(x, i << (32 + 24) | icr | (j << 8));
- }
-
- /* And again with a shorthand destination for all types of IPIs. */
- icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
- for (i = 0; i < 8; i++)
- __test_icr(x, icr | (i << 8));
-
- /* And a few garbage value, just make sure it's an IRQ (blocked). */
- __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
- __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
- __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
-}
-
-static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
-{
- uint32_t apic_id, expected;
- struct kvm_lapic_state xapic;
-
- vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
-
- vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-
- expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
- apic_id = *((u32 *)&xapic.regs[APIC_ID]);
-
- TEST_ASSERT(apic_id == expected,
- "APIC_ID not set back to %s format; wanted = %x, got = %x",
- (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
- expected, apic_id);
-}
-
-/*
- * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
- * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and
- * when the APIC transitions for DISABLED to ENABLED is architectural behavior
- * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
- * attempted to transition from x2APIC to xAPIC without disabling the APIC is
- * architecturally disallowed.
- */
-static void test_apic_id(void)
-{
- const uint32_t NR_VCPUS = 3;
- struct kvm_vcpu *vcpus[NR_VCPUS];
- uint64_t apic_base;
- struct kvm_vm *vm;
- int i;
-
- vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
- vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
-
- for (i = 0; i < NR_VCPUS; i++) {
- apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
-
- TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
- "APIC not in ENABLED state at vCPU RESET");
- TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
- "APIC not in xAPIC mode at vCPU RESET");
-
- __test_apic_id(vcpus[i], apic_base);
- __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
- __test_apic_id(vcpus[i], apic_base);
- }
-
- kvm_vm_free(vm);
-}
-
-static void test_x2apic_id(void)
-{
- struct kvm_lapic_state lapic = {};
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- int i;
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
- vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-
- /*
- * Try stuffing a modified x2APIC ID, KVM should ignore the value and
- * always return the vCPU's default/readonly x2APIC ID.
- */
- for (i = 0; i <= 0xff; i++) {
- *(u32 *)(lapic.regs + APIC_ID) = i << 24;
- *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
- vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
-
- vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
- TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
- "x2APIC ID should be fully readonly");
- }
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- struct xapic_vcpu x = {
- .vcpu = NULL,
- .is_x2apic = true,
- };
- struct kvm_vm *vm;
-
- vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
- test_icr(&x);
- kvm_vm_free(vm);
-
- /*
- * Use a second VM for the xAPIC test so that x2APIC can be hidden from
- * the guest in order to test AVIC. KVM disallows changing CPUID after
- * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
- */
- vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
- x.is_x2apic = false;
-
- /*
- * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
- * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
- * drops writes, AMD does not). Account for the errata when checking
- * that KVM reads back what was written.
- */
- x.has_xavic_errata = host_cpu_is_amd &&
- get_kvm_amd_param_bool("avic");
-
- vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
-
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
- test_icr(&x);
- kvm_vm_free(vm);
-
- test_apic_id();
- test_x2apic_id();
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * XCR0 cpuid test
- *
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-/*
- * Assert that architectural dependency rules are satisfied, e.g. that AVX is
- * supported if and only if SSE is supported.
- */
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
-do { \
- uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
- \
- __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
- __supported == ((xfeatures) | (dependencies)), \
- "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \
- __supported, (xfeatures), (dependencies)); \
-} while (0)
-
-/*
- * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU
- * isn't strictly required to _support_ all XFeatures related to a feature, but
- * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
- * disabled coherently. E.g. a CPU can technically enumerate supported for
- * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
- * XTILE_DATA will #GP.
- */
-#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
-do { \
- uint64_t __supported = (supported_xcr0) & (xfeatures); \
- \
- __GUEST_ASSERT(!__supported || __supported == (xfeatures), \
- "supported = 0x%lx, xfeatures = 0x%llx", \
- __supported, (xfeatures)); \
-} while (0)
-
-static void guest_code(void)
-{
- uint64_t initial_xcr0;
- uint64_t supported_xcr0;
- int i, vector;
-
- set_cr4(get_cr4() | X86_CR4_OSXSAVE);
-
- initial_xcr0 = xgetbv(0);
- supported_xcr0 = this_cpu_supported_xcr0();
-
- GUEST_ASSERT(initial_xcr0 == supported_xcr0);
-
- /* Check AVX */
- ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
- XFEATURE_MASK_YMM,
- XFEATURE_MASK_SSE);
-
- /* Check MPX */
- ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
- XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
- /* Check AVX-512 */
- ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
- XFEATURE_MASK_AVX512,
- XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
- ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
- XFEATURE_MASK_AVX512);
-
- /* Check AMX */
- ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
- XFEATURE_MASK_XTILE);
-
- vector = xsetbv_safe(0, XFEATURE_MASK_FP);
- __GUEST_ASSERT(!vector,
- "Expected success on XSETBV(FP), got vector '0x%x'",
- vector);
-
- vector = xsetbv_safe(0, supported_xcr0);
- __GUEST_ASSERT(!vector,
- "Expected success on XSETBV(0x%lx), got vector '0x%x'",
- supported_xcr0, vector);
-
- for (i = 0; i < 64; i++) {
- if (supported_xcr0 & BIT_ULL(i))
- continue;
-
- vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
- __GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
- BIT_ULL(i), supported_xcr0, vector);
- }
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_run *run;
- struct kvm_vm *vm;
- struct ucall uc;
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- run = vcpu->run;
-
- while (1) {
- vcpu_run(vcpu);
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <sched.h>
-#include <signal.h>
-#include <pthread.h>
-
-#include <sys/eventfd.h>
-
-#define SHINFO_REGION_GVA 0xc0000000ULL
-#define SHINFO_REGION_GPA 0xc0000000ULL
-#define SHINFO_REGION_SLOT 10
-
-#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
-#define DUMMY_REGION_SLOT 11
-
-#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
-#define DUMMY_REGION_SLOT_2 12
-
-#define SHINFO_ADDR (SHINFO_REGION_GPA)
-#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
-#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
-#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
-
-#define SHINFO_VADDR (SHINFO_REGION_GVA)
-#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
-#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
-
-#define EVTCHN_VECTOR 0x10
-
-#define EVTCHN_TEST1 15
-#define EVTCHN_TEST2 66
-#define EVTCHN_TIMER 13
-
-enum {
- TEST_INJECT_VECTOR = 0,
- TEST_RUNSTATE_runnable,
- TEST_RUNSTATE_blocked,
- TEST_RUNSTATE_offline,
- TEST_RUNSTATE_ADJUST,
- TEST_RUNSTATE_DATA,
- TEST_STEAL_TIME,
- TEST_EVTCHN_MASKED,
- TEST_EVTCHN_UNMASKED,
- TEST_EVTCHN_SLOWPATH,
- TEST_EVTCHN_SEND_IOCTL,
- TEST_EVTCHN_HCALL,
- TEST_EVTCHN_HCALL_SLOWPATH,
- TEST_EVTCHN_HCALL_EVENTFD,
- TEST_TIMER_SETUP,
- TEST_TIMER_WAIT,
- TEST_TIMER_RESTORE,
- TEST_POLL_READY,
- TEST_POLL_TIMEOUT,
- TEST_POLL_MASKED,
- TEST_POLL_WAKE,
- SET_VCPU_INFO,
- TEST_TIMER_PAST,
- TEST_LOCKING_SEND_RACE,
- TEST_LOCKING_POLL_RACE,
- TEST_LOCKING_POLL_TIMEOUT,
- TEST_DONE,
-
- TEST_GUEST_SAW_IRQ,
-};
-
-#define XEN_HYPERCALL_MSR 0x40000000
-
-#define MIN_STEAL_TIME 50000
-
-#define SHINFO_RACE_TIMEOUT 2 /* seconds */
-
-#define __HYPERVISOR_set_timer_op 15
-#define __HYPERVISOR_sched_op 29
-#define __HYPERVISOR_event_channel_op 32
-
-#define SCHEDOP_poll 3
-
-#define EVTCHNOP_send 4
-
-#define EVTCHNSTAT_interdomain 2
-
-struct evtchn_send {
- u32 port;
-};
-
-struct sched_poll {
- u32 *ports;
- unsigned int nr_ports;
- u64 timeout;
-};
-
-struct pvclock_vcpu_time_info {
- u32 version;
- u32 pad0;
- u64 tsc_timestamp;
- u64 system_time;
- u32 tsc_to_system_mul;
- s8 tsc_shift;
- u8 flags;
- u8 pad[2];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
- u32 version;
- u32 sec;
- u32 nsec;
-} __attribute__((__packed__));
-
-struct vcpu_runstate_info {
- uint32_t state;
- uint64_t state_entry_time;
- uint64_t time[5]; /* Extra field for overrun check */
-};
-
-struct compat_vcpu_runstate_info {
- uint32_t state;
- uint64_t state_entry_time;
- uint64_t time[5];
-} __attribute__((__packed__));
-
-struct arch_vcpu_info {
- unsigned long cr2;
- unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
-};
-
-struct vcpu_info {
- uint8_t evtchn_upcall_pending;
- uint8_t evtchn_upcall_mask;
- unsigned long evtchn_pending_sel;
- struct arch_vcpu_info arch;
- struct pvclock_vcpu_time_info time;
-}; /* 64 bytes (x86) */
-
-struct shared_info {
- struct vcpu_info vcpu_info[32];
- unsigned long evtchn_pending[64];
- unsigned long evtchn_mask[64];
- struct pvclock_wall_clock wc;
- uint32_t wc_sec_hi;
- /* arch_shared_info here */
-};
-
-#define RUNSTATE_running 0
-#define RUNSTATE_runnable 1
-#define RUNSTATE_blocked 2
-#define RUNSTATE_offline 3
-
-static const char *runstate_names[] = {
- "running",
- "runnable",
- "blocked",
- "offline"
-};
-
-struct {
- struct kvm_irq_routing info;
- struct kvm_irq_routing_entry entries[2];
-} irq_routes;
-
-static volatile bool guest_saw_irq;
-
-static void evtchn_handler(struct ex_regs *regs)
-{
- struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
-
- vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
- vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
- guest_saw_irq = true;
-
- GUEST_SYNC(TEST_GUEST_SAW_IRQ);
-}
-
-static void guest_wait_for_irq(void)
-{
- while (!guest_saw_irq)
- __asm__ __volatile__ ("rep nop" : : : "memory");
- guest_saw_irq = false;
-}
-
-static void guest_code(void)
-{
- struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
- int i;
-
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
-
- /* Trigger an interrupt injection */
- GUEST_SYNC(TEST_INJECT_VECTOR);
-
- guest_wait_for_irq();
-
- /* Test having the host set runstates manually */
- GUEST_SYNC(TEST_RUNSTATE_runnable);
- GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
- GUEST_ASSERT(rs->state == 0);
-
- GUEST_SYNC(TEST_RUNSTATE_blocked);
- GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
- GUEST_ASSERT(rs->state == 0);
-
- GUEST_SYNC(TEST_RUNSTATE_offline);
- GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
- GUEST_ASSERT(rs->state == 0);
-
- /* Test runstate time adjust */
- GUEST_SYNC(TEST_RUNSTATE_ADJUST);
- GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
- GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
-
- /* Test runstate time set */
- GUEST_SYNC(TEST_RUNSTATE_DATA);
- GUEST_ASSERT(rs->state_entry_time >= 0x8000);
- GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
- GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
- GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
-
- /* sched_yield() should result in some 'runnable' time */
- GUEST_SYNC(TEST_STEAL_TIME);
- GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
-
- /* Attempt to deliver a *masked* interrupt */
- GUEST_SYNC(TEST_EVTCHN_MASKED);
-
- /* Wait until we see the bit set */
- struct shared_info *si = (void *)SHINFO_VADDR;
- while (!si->evtchn_pending[0])
- __asm__ __volatile__ ("rep nop" : : : "memory");
-
- /* Now deliver an *unmasked* interrupt */
- GUEST_SYNC(TEST_EVTCHN_UNMASKED);
-
- guest_wait_for_irq();
-
- /* Change memslots and deliver an interrupt */
- GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
-
- guest_wait_for_irq();
-
- /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
- GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
-
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_EVTCHN_HCALL);
-
- /* Our turn. Deliver event channel (to ourselves) with
- * EVTCHNOP_send hypercall. */
- struct evtchn_send s = { .port = 127 };
- xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
-
- /*
- * Same again, but this time the host has messed with memslots so it
- * should take the slow path in kvm_xen_set_evtchn().
- */
- xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
-
- /* Deliver "outbound" event channel to an eventfd which
- * happens to be one of our own irqfds. */
- s.port = 197;
- xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_TIMER_SETUP);
-
- /* Set a timer 100ms in the future. */
- xen_hypercall(__HYPERVISOR_set_timer_op,
- rs->state_entry_time + 100000000, NULL);
-
- GUEST_SYNC(TEST_TIMER_WAIT);
-
- /* Now wait for the timer */
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_TIMER_RESTORE);
-
- /* The host has 'restored' the timer. Just wait for it. */
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_POLL_READY);
-
- /* Poll for an event channel port which is already set */
- u32 ports[1] = { EVTCHN_TIMER };
- struct sched_poll p = {
- .ports = ports,
- .nr_ports = 1,
- .timeout = 0,
- };
-
- xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
- GUEST_SYNC(TEST_POLL_TIMEOUT);
-
- /* Poll for an unset port and wait for the timeout. */
- p.timeout = 100000000;
- xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
- GUEST_SYNC(TEST_POLL_MASKED);
-
- /* A timer will wake the masked port we're waiting on, while we poll */
- p.timeout = 0;
- xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
- GUEST_SYNC(TEST_POLL_WAKE);
-
- /* Set the vcpu_info to point at exactly the place it already is to
- * make sure the attribute is functional. */
- GUEST_SYNC(SET_VCPU_INFO);
-
- /* A timer wake an *unmasked* port which should wake us with an
- * actual interrupt, while we're polling on a different port. */
- ports[0]++;
- p.timeout = 0;
- xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_TIMER_PAST);
-
- /* Timer should have fired already */
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_LOCKING_SEND_RACE);
- /* Racing host ioctls */
-
- guest_wait_for_irq();
-
- GUEST_SYNC(TEST_LOCKING_POLL_RACE);
- /* Racing vmcall against host ioctl */
-
- ports[0] = 0;
-
- p = (struct sched_poll) {
- .ports = ports,
- .nr_ports = 1,
- .timeout = 0
- };
-
-wait_for_timer:
- /*
- * Poll for a timer wake event while the worker thread is mucking with
- * the shared info. KVM XEN drops timer IRQs if the shared info is
- * invalid when the timer expires. Arbitrarily poll 100 times before
- * giving up and asking the VMM to re-arm the timer. 100 polls should
- * consume enough time to beat on KVM without taking too long if the
- * timer IRQ is dropped due to an invalid event channel.
- */
- for (i = 0; i < 100 && !guest_saw_irq; i++)
- __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
- /*
- * Re-send the timer IRQ if it was (likely) dropped due to the timer
- * expiring while the event channel was invalid.
- */
- if (!guest_saw_irq) {
- GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
- goto wait_for_timer;
- }
- guest_saw_irq = false;
-
- GUEST_SYNC(TEST_DONE);
-}
-
-static struct shared_info *shinfo;
-static struct vcpu_info *vinfo;
-static struct kvm_vcpu *vcpu;
-
-static void handle_alrm(int sig)
-{
- if (vinfo)
- printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
- vcpu_dump(stdout, vcpu, 0);
- TEST_FAIL("IRQ delivery timed out");
-}
-
-static void *juggle_shinfo_state(void *arg)
-{
- struct kvm_vm *vm = (struct kvm_vm *)arg;
-
- struct kvm_xen_hvm_attr cache_activate_gfn = {
- .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
- .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
- };
-
- struct kvm_xen_hvm_attr cache_deactivate_gfn = {
- .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
- .u.shared_info.gfn = KVM_XEN_INVALID_GFN
- };
-
- struct kvm_xen_hvm_attr cache_activate_hva = {
- .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
- .u.shared_info.hva = (unsigned long)shinfo
- };
-
- struct kvm_xen_hvm_attr cache_deactivate_hva = {
- .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
- .u.shared_info.hva = 0
- };
-
- int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-
- for (;;) {
- __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
- pthread_testcancel();
- __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
-
- if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
- __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
- pthread_testcancel();
- __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
- }
- }
-
- return NULL;
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_xen_hvm_attr evt_reset;
- struct kvm_vm *vm;
- pthread_t thread;
- bool verbose;
- int ret;
-
- verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
- !strncmp(argv[1], "--verbose", 10));
-
- int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
- TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
-
- bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
- bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
- bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
- bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
- bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- /* Map a region for the shared_info page */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
- virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
-
- shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
-
- int zero_fd = open("/dev/zero", O_RDONLY);
- TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
-
- struct kvm_xen_hvm_config hvmc = {
- .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
- .msr = XEN_HYPERCALL_MSR,
- };
-
- /* Let the kernel know that we *will* use it for sending all
- * event channels, which lets it intercept SCHEDOP_poll */
- if (do_evtchn_tests)
- hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
-
- vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
- struct kvm_xen_hvm_attr lm = {
- .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
- .u.long_mode = 1,
- };
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
- if (do_runstate_flag) {
- struct kvm_xen_hvm_attr ruf = {
- .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
- .u.runstate_update_flag = 1,
- };
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
-
- ruf.u.runstate_update_flag = 0;
- vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
- TEST_ASSERT(ruf.u.runstate_update_flag == 1,
- "Failed to read back RUNSTATE_UPDATE_FLAG attr");
- }
-
- struct kvm_xen_hvm_attr ha = {};
-
- if (has_shinfo_hva) {
- ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
- ha.u.shared_info.hva = (unsigned long)shinfo;
- } else {
- ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
- ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
- }
-
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
-
- /*
- * Test what happens when the HVA of the shinfo page is remapped after
- * the kernel has a reference to it. But make sure we copy the clock
- * info over since that's only set at setup time, and we test it later.
- */
- struct pvclock_wall_clock wc_copy = shinfo->wc;
- void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
- TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
- shinfo->wc = wc_copy;
-
- struct kvm_xen_vcpu_attr vi = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
- .u.gpa = VCPU_INFO_ADDR,
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
-
- struct kvm_xen_vcpu_attr pvclock = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
- .u.gpa = PVTIME_ADDR,
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
-
- struct kvm_xen_hvm_attr vec = {
- .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
- .u.vector = EVTCHN_VECTOR,
- };
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
-
- vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
-
- if (do_runstate_tests) {
- struct kvm_xen_vcpu_attr st = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
- .u.gpa = RUNSTATE_ADDR,
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
- }
-
- int irq_fd[2] = { -1, -1 };
-
- if (do_eventfd_tests) {
- irq_fd[0] = eventfd(0, 0);
- irq_fd[1] = eventfd(0, 0);
-
- /* Unexpected, but not a KVM failure */
- if (irq_fd[0] == -1 || irq_fd[1] == -1)
- do_evtchn_tests = do_eventfd_tests = false;
- }
-
- if (do_eventfd_tests) {
- irq_routes.info.nr = 2;
-
- irq_routes.entries[0].gsi = 32;
- irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
- irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
- irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
- irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
- irq_routes.entries[1].gsi = 33;
- irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
- irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
- irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
- irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
- vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
-
- struct kvm_irqfd ifd = { };
-
- ifd.fd = irq_fd[0];
- ifd.gsi = 32;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
-
- ifd.fd = irq_fd[1];
- ifd.gsi = 33;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
-
- struct sigaction sa = { };
- sa.sa_handler = handle_alrm;
- sigaction(SIGALRM, &sa, NULL);
- }
-
- struct kvm_xen_vcpu_attr tmr = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
- .u.timer.port = EVTCHN_TIMER,
- .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
- .u.timer.expires_ns = 0
- };
-
- if (do_evtchn_tests) {
- struct kvm_xen_hvm_attr inj = {
- .type = KVM_XEN_ATTR_TYPE_EVTCHN,
- .u.evtchn.send_port = 127,
- .u.evtchn.type = EVTCHNSTAT_interdomain,
- .u.evtchn.flags = 0,
- .u.evtchn.deliver.port.port = EVTCHN_TEST1,
- .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
- .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
- };
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
- /* Test migration to a different vCPU */
- inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
- inj.u.evtchn.deliver.port.vcpu = vcpu->id;
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
- inj.u.evtchn.send_port = 197;
- inj.u.evtchn.deliver.eventfd.port = 0;
- inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
- inj.u.evtchn.flags = 0;
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
- }
- vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
- vinfo->evtchn_upcall_pending = 0;
-
- struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
- rs->state = 0x5a;
-
- bool evtchn_irq_expected = false;
-
- for (;;) {
- struct ucall uc;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC: {
- struct kvm_xen_vcpu_attr rst;
- long rundelay;
-
- if (do_runstate_tests)
- TEST_ASSERT(rs->state_entry_time == rs->time[0] +
- rs->time[1] + rs->time[2] + rs->time[3],
- "runstate times don't add up");
-
- switch (uc.args[1]) {
- case TEST_INJECT_VECTOR:
- if (verbose)
- printf("Delivering evtchn upcall\n");
- evtchn_irq_expected = true;
- vinfo->evtchn_upcall_pending = 1;
- break;
-
- case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
- TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
- if (!do_runstate_tests)
- goto done;
- if (verbose)
- printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
- rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
- rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
- TEST_RUNSTATE_runnable;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
- break;
-
- case TEST_RUNSTATE_ADJUST:
- if (verbose)
- printf("Testing RUNSTATE_ADJUST\n");
- rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
- memset(&rst.u, 0, sizeof(rst.u));
- rst.u.runstate.state = (uint64_t)-1;
- rst.u.runstate.time_blocked =
- 0x5a - rs->time[RUNSTATE_blocked];
- rst.u.runstate.time_offline =
- 0x6b6b - rs->time[RUNSTATE_offline];
- rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
- rst.u.runstate.time_offline;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
- break;
-
- case TEST_RUNSTATE_DATA:
- if (verbose)
- printf("Testing RUNSTATE_DATA\n");
- rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
- memset(&rst.u, 0, sizeof(rst.u));
- rst.u.runstate.state = RUNSTATE_running;
- rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
- rst.u.runstate.time_blocked = 0x6b6b;
- rst.u.runstate.time_offline = 0x5a;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
- break;
-
- case TEST_STEAL_TIME:
- if (verbose)
- printf("Testing steal time\n");
- /* Yield until scheduler delay exceeds target */
- rundelay = get_run_delay() + MIN_STEAL_TIME;
- do {
- sched_yield();
- } while (get_run_delay() < rundelay);
- break;
-
- case TEST_EVTCHN_MASKED:
- if (!do_eventfd_tests)
- goto done;
- if (verbose)
- printf("Testing masked event channel\n");
- shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
- eventfd_write(irq_fd[0], 1UL);
- alarm(1);
- break;
-
- case TEST_EVTCHN_UNMASKED:
- if (verbose)
- printf("Testing unmasked event channel\n");
- /* Unmask that, but deliver the other one */
- shinfo->evtchn_pending[0] = 0;
- shinfo->evtchn_mask[0] = 0;
- eventfd_write(irq_fd[1], 1UL);
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_EVTCHN_SLOWPATH:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- shinfo->evtchn_pending[1] = 0;
- if (verbose)
- printf("Testing event channel after memslot change\n");
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
- eventfd_write(irq_fd[0], 1UL);
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_EVTCHN_SEND_IOCTL:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- if (!do_evtchn_tests)
- goto done;
-
- shinfo->evtchn_pending[0] = 0;
- if (verbose)
- printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
-
- struct kvm_irq_routing_xen_evtchn e;
- e.port = EVTCHN_TEST2;
- e.vcpu = vcpu->id;
- e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
- vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_EVTCHN_HCALL:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- shinfo->evtchn_pending[1] = 0;
-
- if (verbose)
- printf("Testing guest EVTCHNOP_send direct to evtchn\n");
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_EVTCHN_HCALL_SLOWPATH:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- shinfo->evtchn_pending[0] = 0;
-
- if (verbose)
- printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_EVTCHN_HCALL_EVENTFD:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- shinfo->evtchn_pending[0] = 0;
-
- if (verbose)
- printf("Testing guest EVTCHNOP_send to eventfd\n");
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_TIMER_SETUP:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- shinfo->evtchn_pending[1] = 0;
-
- if (verbose)
- printf("Testing guest oneshot timer\n");
- break;
-
- case TEST_TIMER_WAIT:
- memset(&tmr, 0, sizeof(tmr));
- tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
- TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
- "Timer port not returned");
- TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
- "Timer priority not returned");
- TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
- "Timer expiry not returned");
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_TIMER_RESTORE:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- shinfo->evtchn_pending[0] = 0;
-
- if (verbose)
- printf("Testing restored oneshot timer\n");
-
- tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
- evtchn_irq_expected = true;
- alarm(1);
- break;
-
- case TEST_POLL_READY:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
-
- if (verbose)
- printf("Testing SCHEDOP_poll with already pending event\n");
- shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
- alarm(1);
- break;
-
- case TEST_POLL_TIMEOUT:
- if (verbose)
- printf("Testing SCHEDOP_poll timeout\n");
- shinfo->evtchn_pending[0] = 0;
- alarm(1);
- break;
-
- case TEST_POLL_MASKED:
- if (verbose)
- printf("Testing SCHEDOP_poll wake on masked event\n");
-
- tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
- alarm(1);
- break;
-
- case TEST_POLL_WAKE:
- shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
- if (verbose)
- printf("Testing SCHEDOP_poll wake on unmasked event\n");
-
- evtchn_irq_expected = true;
- tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-
- /* Read it back and check the pending time is reported correctly */
- tmr.u.timer.expires_ns = 0;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
- TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
- "Timer not reported pending");
- alarm(1);
- break;
-
- case SET_VCPU_INFO:
- if (has_shinfo_hva) {
- struct kvm_xen_vcpu_attr vih = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
- .u.hva = (unsigned long)vinfo
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
- }
- break;
-
- case TEST_TIMER_PAST:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- /* Read timer and check it is no longer pending */
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
- TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
-
- shinfo->evtchn_pending[0] = 0;
- if (verbose)
- printf("Testing timer in the past\n");
-
- evtchn_irq_expected = true;
- tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
- alarm(1);
- break;
-
- case TEST_LOCKING_SEND_RACE:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
- alarm(0);
-
- if (verbose)
- printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
-
- ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
- TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
-
- struct kvm_irq_routing_xen_evtchn uxe = {
- .port = 1,
- .vcpu = vcpu->id,
- .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
- };
-
- evtchn_irq_expected = true;
- for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
- __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
- break;
-
- case TEST_LOCKING_POLL_RACE:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
-
- if (verbose)
- printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
-
- shinfo->evtchn_pending[0] = 1;
-
- evtchn_irq_expected = true;
- tmr.u.timer.expires_ns = rs->state_entry_time +
- SHINFO_RACE_TIMEOUT * 1000000000ULL;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
- break;
-
- case TEST_LOCKING_POLL_TIMEOUT:
- /*
- * Optional and possibly repeated sync point.
- * Injecting the timer IRQ may fail if the
- * shinfo is invalid when the timer expires.
- * If the timer has expired but the IRQ hasn't
- * been delivered, rearm the timer and retry.
- */
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-
- /* Resume the guest if the timer is still pending. */
- if (tmr.u.timer.expires_ns)
- break;
-
- /* All done if the IRQ was delivered. */
- if (!evtchn_irq_expected)
- break;
-
- tmr.u.timer.expires_ns = rs->state_entry_time +
- SHINFO_RACE_TIMEOUT * 1000000000ULL;
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
- break;
- case TEST_DONE:
- TEST_ASSERT(!evtchn_irq_expected,
- "Expected event channel IRQ but it didn't happen");
-
- ret = pthread_cancel(thread);
- TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
-
- ret = pthread_join(thread, 0);
- TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
- goto done;
-
- case TEST_GUEST_SAW_IRQ:
- TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
- evtchn_irq_expected = false;
- break;
- }
- break;
- }
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
- }
-
- done:
- evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
- evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
-
- alarm(0);
-
- /*
- * Just a *really* basic check that things are being put in the
- * right place. The actual calculations are much the same for
- * Xen as they are for the KVM variants, so no need to check.
- */
- struct pvclock_wall_clock *wc;
- struct pvclock_vcpu_time_info *ti, *ti2;
- struct kvm_clock_data kcdata;
- long long delta;
-
- wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
- ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
- ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
-
- if (verbose) {
- printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
- printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
- ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
- ti->tsc_shift, ti->flags);
- printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
- ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
- ti2->tsc_shift, ti2->flags);
- }
-
- TEST_ASSERT(wc->version && !(wc->version & 1),
- "Bad wallclock version %x", wc->version);
-
- vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
-
- if (kcdata.flags & KVM_CLOCK_REALTIME) {
- if (verbose) {
- printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
- kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
- printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
- kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
- }
-
- delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
-
- /*
- * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
- * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
- * delta as testing clock accuracy is not the goal here. The test just needs to
- * check that the value in shinfo is somewhat sane.
- */
- TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
- "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
- wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
- (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
- } else {
- pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
- }
-
- TEST_ASSERT(ti->version && !(ti->version & 1),
- "Bad time_info version %x", ti->version);
- TEST_ASSERT(ti2->version && !(ti2->version & 1),
- "Bad time_info version %x", ti->version);
-
- if (do_runstate_tests) {
- /*
- * Fetch runstate and check sanity. Strictly speaking in the
- * general case we might not expect the numbers to be identical
- * but in this case we know we aren't running the vCPU any more.
- */
- struct kvm_xen_vcpu_attr rst = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
-
- if (verbose) {
- printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
- rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
- rs->state, rs->state_entry_time);
- for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
- printf("State %s: %" PRIu64 " ns\n",
- runstate_names[i], rs->time[i]);
- }
- }
-
- /*
- * Exercise runstate info at all points across the page boundary, in
- * 32-bit and 64-bit mode. In particular, test the case where it is
- * configured in 32-bit mode and then switched to 64-bit mode while
- * active, which takes it onto the second page.
- */
- unsigned long runstate_addr;
- struct compat_vcpu_runstate_info *crs;
- for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
- runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
-
- rs = addr_gpa2hva(vm, runstate_addr);
- crs = (void *)rs;
-
- memset(rs, 0xa5, sizeof(*rs));
-
- /* Set to compatibility mode */
- lm.u.long_mode = 0;
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
- /* Set runstate to new address (kernel will write it) */
- struct kvm_xen_vcpu_attr st = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
- .u.gpa = runstate_addr,
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
-
- if (verbose)
- printf("Compatibility runstate at %08lx\n", runstate_addr);
-
- TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
- TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
- "State entry time mismatch");
- TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
- "Running time mismatch");
- TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
- "Runnable time mismatch");
- TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
- "Blocked time mismatch");
- TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
- "Offline time mismatch");
- TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
- "Structure overrun");
- TEST_ASSERT(crs->state_entry_time == crs->time[0] +
- crs->time[1] + crs->time[2] + crs->time[3],
- "runstate times don't add up");
-
-
- /* Now switch to 64-bit mode */
- lm.u.long_mode = 1;
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
- memset(rs, 0xa5, sizeof(*rs));
-
- /* Don't change the address, just trigger a write */
- struct kvm_xen_vcpu_attr adj = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
- .u.runstate.state = (uint64_t)-1
- };
- vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
-
- if (verbose)
- printf("64-bit runstate at %08lx\n", runstate_addr);
-
- TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
- TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
- "State entry time mismatch");
- TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
- "Running time mismatch");
- TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
- "Runnable time mismatch");
- TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
- "Blocked time mismatch");
- TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
- "Offline time mismatch");
- TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
- "Structure overrun");
-
- TEST_ASSERT(rs->state_entry_time == rs->time[0] +
- rs->time[1] + rs->time[2] + rs->time[3],
- "runstate times don't add up");
- }
- }
-
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * xen_vmcall_test
- *
- * Copyright © 2020 Amazon.com, Inc. or its affiliates.
- *
- * Userspace hypercall testing
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-#define HCALL_REGION_GPA 0xc0000000ULL
-#define HCALL_REGION_SLOT 10
-
-#define INPUTVALUE 17
-#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
-#define RETVALUE 0xcafef00dfbfbffffUL
-
-#define XEN_HYPERCALL_MSR 0x40000200
-#define HV_GUEST_OS_ID_MSR 0x40000000
-#define HV_HYPERCALL_MSR 0x40000001
-
-#define HVCALL_SIGNAL_EVENT 0x005d
-#define HV_STATUS_INVALID_ALIGNMENT 4
-
-static void guest_code(void)
-{
- unsigned long rax = INPUTVALUE;
- unsigned long rdi = ARGVALUE(1);
- unsigned long rsi = ARGVALUE(2);
- unsigned long rdx = ARGVALUE(3);
- unsigned long rcx;
- register unsigned long r10 __asm__("r10") = ARGVALUE(4);
- register unsigned long r8 __asm__("r8") = ARGVALUE(5);
- register unsigned long r9 __asm__("r9") = ARGVALUE(6);
-
- /* First a direct invocation of 'vmcall' */
- __asm__ __volatile__("vmcall" :
- "=a"(rax) :
- "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
- "r"(r10), "r"(r8), "r"(r9));
- GUEST_ASSERT(rax == RETVALUE);
-
- /* Fill in the Xen hypercall page */
- __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
- "a" (HCALL_REGION_GPA & 0xffffffff),
- "d" (HCALL_REGION_GPA >> 32));
-
- /* Set Hyper-V Guest OS ID */
- __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
- "a" (0x5a), "d" (0));
-
- /* Hyper-V hypercall page */
- u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
- __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
- "a" (msrval & 0xffffffff),
- "d" (msrval >> 32));
-
- /* Invoke a Xen hypercall */
- __asm__ __volatile__("call *%1" : "=a"(rax) :
- "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
- "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
- "r"(r10), "r"(r8), "r"(r9));
- GUEST_ASSERT(rax == RETVALUE);
-
- /* Invoke a Hyper-V hypercall */
- rax = 0;
- rcx = HVCALL_SIGNAL_EVENT; /* code */
- rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */
- __asm__ __volatile__("call *%1" : "=a"(rax) :
- "r"(HCALL_REGION_GPA + PAGE_SIZE),
- "a"(rax), "c"(rcx), "d"(rdx),
- "r"(r8));
- GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int xen_caps;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
- TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_set_hv_cpuid(vcpu);
-
- struct kvm_xen_hvm_config hvmc = {
- .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
- .msr = XEN_HYPERCALL_MSR,
- };
- vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
- /* Map a region for the hypercall pages */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
- virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
-
- for (;;) {
- volatile struct kvm_run *run = vcpu->run;
- struct ucall uc;
-
- vcpu_run(vcpu);
-
- if (run->exit_reason == KVM_EXIT_XEN) {
- TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
- TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
- TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
- TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
- TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
- TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
- TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
- TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
- TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
- TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
- run->xen.u.hcall.result = RETVALUE;
- continue;
- }
-
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
- }
-done:
- kvm_vm_free(vm);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2019, Google LLC.
- *
- * Tests for the IA32_XSS MSR.
- */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MSR_BITS 64
-
-int main(int argc, char *argv[])
-{
- bool xss_in_msr_list;
- struct kvm_vm *vm;
- struct kvm_vcpu *vcpu;
- uint64_t xss_val;
- int i, r;
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
-
- xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
- TEST_ASSERT(xss_val == 0,
- "MSR_IA32_XSS should be initialized to zero");
-
- vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
-
- /*
- * At present, KVM only supports a guest IA32_XSS value of 0. Verify
- * that trying to set the guest IA32_XSS to an unsupported value fails.
- * Also, in the future when a non-zero value succeeds check that
- * IA32_XSS is in the list of MSRs to save/restore.
- */
- xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
- for (i = 0; i < MSR_BITS; ++i) {
- r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
-
- /*
- * Setting a list of MSRs returns the entry that "faulted", or
- * the last entry +1 if all MSRs were successfully written.
- */
- TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
- TEST_ASSERT(r != 1 || xss_in_msr_list,
- "IA32_XSS was able to be set, but was not in save/restore list");
- }
-
- kvm_vm_free(vm);
-}