From: Paolo Bonzini Date: Fri, 25 Jun 2021 15:24:24 +0000 (-0400) Subject: Merge tag 'kvmarm-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmar... X-Git-Tag: block-5.14-2021-07-08~73^2 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=b8917b4ae44d1b945f6fba3d8ee6777edb44633b;p=linux-2.6-block.git Merge tag 'kvmarm-5.14' of git://git./linux/kernel/git/kvmarm/kvmarm into HEAD KVM/arm64 updates for v5.14. - Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes --- b8917b4ae44d1b945f6fba3d8ee6777edb44633b diff --cc Documentation/virt/kvm/api.rst index 3b6e3b1628b4,97661a97943f..b9ddce5638f5 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@@ -5039,224 -5034,43 +5039,260 @@@ see KVM_XEN_VCPU_SET_ATTR above The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used with the KVM_XEN_VCPU_GET_ATTR ioctl. + 4.130 KVM_ARM_MTE_COPY_TAGS + --------------------------- + + :Capability: KVM_CAP_ARM_MTE + :Architectures: arm64 + :Type: vm ioctl + :Parameters: struct kvm_arm_copy_mte_tags + :Returns: number of bytes copied, < 0 on error (-EINVAL for incorrect + arguments, -EFAULT if memory cannot be accessed). + + :: + + struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void __user *addr; + __u64 flags; + __u64 reserved[2]; + }; + + Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The + ``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr`` + field must point to a buffer which the tags will be copied to or from. + + ``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or + ``KVM_ARM_TAGS_FROM_GUEST``. + + The size of the buffer to store the tags is ``(length / 16)`` bytes + (granules in MTE are 16 bytes long). Each byte contains a single tag + value. This matches the format of ``PTRACE_PEEKMTETAGS`` and + ``PTRACE_POKEMTETAGS``. + + If an error occurs before any data is copied then a negative error code is + returned. If some tags have been copied before an error occurs then the number + of bytes successfully copied is returned. If the call completes successfully + then ``length`` is returned. +4.131 KVM_GET_SREGS2 +------------------ + +:Capability: KVM_CAP_SREGS2 +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: struct kvm_sregs2 (out) +:Returns: 0 on success, -1 on error + +Reads special registers from the vcpu. +This ioctl (when supported) replaces the KVM_GET_SREGS. + +:: + +struct kvm_sregs2 { + /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 flags; + __u64 pdptrs[4]; +}; + +flags values for ``kvm_sregs2``: + +``KVM_SREGS2_FLAGS_PDPTRS_VALID`` + + Indicates thats the struct contain valid PDPTR values. + + +4.132 KVM_SET_SREGS2 +------------------ + +:Capability: KVM_CAP_SREGS2 +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: struct kvm_sregs2 (in) +:Returns: 0 on success, -1 on error + +Writes special registers into the vcpu. +See KVM_GET_SREGS2 for the data structures. +This ioctl (when supported) replaces the KVM_SET_SREGS. + +4.133 KVM_GET_STATS_FD +---------------------- + +:Capability: KVM_CAP_STATS_BINARY_FD +:Architectures: all +:Type: vm ioctl, vcpu ioctl +:Parameters: none +:Returns: statistics file descriptor on success, < 0 on error + +Errors: + + ====== ====================================================== + ENOMEM if the fd could not be created due to lack of memory + EMFILE if the number of opened files exceeds the limit + ====== ====================================================== + +The returned file descriptor can be used to read VM/vCPU statistics data in +binary format. The data in the file descriptor consists of four blocks +organized as follows: + ++-------------+ +| Header | ++-------------+ +| id string | ++-------------+ +| Descriptors | ++-------------+ +| Stats Data | ++-------------+ + +Apart from the header starting at offset 0, please be aware that it is +not guaranteed that the four blocks are adjacent or in the above order; +the offsets of the id, descriptors and data blocks are found in the +header. However, all four blocks are aligned to 64 bit offsets in the +file and they do not overlap. + +All blocks except the data block are immutable. Userspace can read them +only one time after retrieving the file descriptor, and then use ``pread`` or +``lseek`` to read the statistics repeatedly. + +All data is in system endianness. + +The format of the header is as follows:: + + struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; + }; + +The ``flags`` field is not used at the moment. It is always read as 0. + +The ``name_size`` field is the size (in byte) of the statistics name string +(including trailing '\0') which is contained in the "id string" block and +appended at the end of every descriptor. + +The ``num_desc`` field is the number of descriptors that are included in the +descriptor block. (The actual number of values in the data block may be +larger, since each descriptor may comprise more than one value). + +The ``id_offset`` field is the offset of the id string from the start of the +file indicated by the file descriptor. It is a multiple of 8. + +The ``desc_offset`` field is the offset of the Descriptors block from the start +of the file indicated by the file descriptor. It is a multiple of 8. + +The ``data_offset`` field is the offset of the Stats Data block from the start +of the file indicated by the file descriptor. It is a multiple of 8. + +The id string block contains a string which identifies the file descriptor on +which KVM_GET_STATS_FD was invoked. The size of the block, including the +trailing ``'\0'``, is indicated by the ``name_size`` field in the header. + +The descriptors block is only needed to be read once for the lifetime of the +file descriptor contains a sequence of ``struct kvm_stats_desc``, each followed +by a string of size ``name_size``. + + #define KVM_STATS_TYPE_SHIFT 0 + #define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) + + #define KVM_STATS_UNIT_SHIFT 4 + #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) + #define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) + #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) + #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) + #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) + + #define KVM_STATS_BASE_SHIFT 8 + #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) + #define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) + #define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) + + struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 unused; + char name[]; + }; + +The ``flags`` field contains the type and unit of the statistics data described +by this descriptor. Its endianness is CPU native. +The following flags are supported: + +Bits 0-3 of ``flags`` encode the type: + * ``KVM_STATS_TYPE_CUMULATIVE`` + The statistics data is cumulative. The value of data can only be increased. + Most of the counters used in KVM are of this type. + The corresponding ``size`` field for this type is always 1. + All cumulative statistics data are read/write. + * ``KVM_STATS_TYPE_INSTANT`` + The statistics data is instantaneous. Its value can be increased or + decreased. This type is usually used as a measurement of some resources, + like the number of dirty pages, the number of large pages, etc. + All instant statistics are read only. + The corresponding ``size`` field for this type is always 1. + * ``KVM_STATS_TYPE_PEAK`` + The statistics data is peak. The value of data can only be increased, and + represents a peak value for a measurement, for example the maximum number + of items in a hash table bucket, the longest time waited and so on. + The corresponding ``size`` field for this type is always 1. + +Bits 4-7 of ``flags`` encode the unit: + * ``KVM_STATS_UNIT_NONE`` + There is no unit for the value of statistics data. This usually means that + the value is a simple counter of an event. + * ``KVM_STATS_UNIT_BYTES`` + It indicates that the statistics data is used to measure memory size, in the + unit of Byte, KiByte, MiByte, GiByte, etc. The unit of the data is + determined by the ``exponent`` field in the descriptor. + * ``KVM_STATS_UNIT_SECONDS`` + It indicates that the statistics data is used to measure time or latency. + * ``KVM_STATS_UNIT_CYCLES`` + It indicates that the statistics data is used to measure CPU clock cycles. + +Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the +unit: + * ``KVM_STATS_BASE_POW10`` + The scale is based on power of 10. It is used for measurement of time and + CPU clock cycles. For example, an exponent of -9 can be used with + ``KVM_STATS_UNIT_SECONDS`` to express that the unit is nanoseconds. + * ``KVM_STATS_BASE_POW2`` + The scale is based on power of 2. It is used for measurement of memory size. + For example, an exponent of 20 can be used with ``KVM_STATS_UNIT_BYTES`` to + express that the unit is MiB. + +The ``size`` field is the number of values of this statistics data. Its +value is usually 1 for most of simple statistics. 1 means it contains an +unsigned 64bit data. + +The ``offset`` field is the offset from the start of Data Block to the start of +the corresponding statistics data. + +The ``unused`` field is reserved for future support for other types of +statistics data, like log/linear histogram. Its value is always 0 for the types +defined above. + +The ``name`` field is the name string of the statistics data. The name string +starts at the end of ``struct kvm_stats_desc``. The maximum length including +the trailing ``'\0'``, is indicated by ``name_size`` in the header. + +The Stats Data block contains an array of 64-bit values in the same order +as the descriptors in Descriptors block. + 5. The kvm_run structure ======================== @@@ -6586,42 -6399,29 +6622,65 @@@ default See Documentation/x86/sgx/2.Kernel-internals.rst for more details. -7.26 KVM_CAP_ARM_MTE +7.26 KVM_CAP_PPC_RPT_INVALIDATE +------------------------------- + +:Capability: KVM_CAP_PPC_RPT_INVALIDATE +:Architectures: ppc +:Type: vm + +This capability indicates that the kernel is capable of handling +H_RPT_INVALIDATE hcall. + +In order to enable the use of H_RPT_INVALIDATE in the guest, +user space might have to advertise it for the guest. For example, +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is +present in the "ibm,hypertas-functions" device-tree property. + +This capability is enabled for hypervisors on platforms like POWER9 +that support radix MMU. + +7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE +-------------------------------------- + +:Architectures: x86 +:Parameters: args[0] whether the feature should be enabled or not + +When this capability is enabled, an emulation failure will result in an exit +to userspace with KVM_INTERNAL_ERROR (except when the emulator was invoked +to handle a VMware backdoor instruction). Furthermore, KVM will now provide up +to 15 instruction bytes for any exit to userspace resulting from an emulation +failure. When these exits to userspace occur use the emulation_failure struct +instead of the internal struct. They both have the same layout, but the +emulation_failure struct matches the content better. It also explicitly +defines the 'flags' field which is used to describe the fields in the struct +that are valid (ie: if KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES is +set in the 'flags' field then both 'insn_size' and 'insn_bytes' have valid data +in them.) + ++7.28 KVM_CAP_ARM_MTE + -------------------- + + :Architectures: arm64 + :Parameters: none + + This capability indicates that KVM (and the hardware) supports exposing the + Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the + VMM before creating any VCPUs to allow the guest access. Note that MTE is only + available to a guest running in AArch64 mode and enabling this capability will + cause attempts to create AArch32 VCPUs to fail. + + When enabled the guest is able to access tags associated with any memory given + to the guest. KVM will ensure that the tags are maintained during swap or + hibernation of the host; however the VMM needs to manually save/restore the + tags as appropriate if the VM is migrated. + + When this capability is enabled all memory in memslots must be mapped as + not-shareable (no MAP_SHARED), attempts to create a memslot with a + MAP_SHARED mmap will result in an -EINVAL return. + + When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to + perform a bulk copy of tags to/from the guest. 8. Other capabilities. ====================== diff --cc include/uapi/linux/kvm.h index 68c9e6d8bbda,da1edd2b4046..d9e4aabcb31a --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@@ -1105,12 -1083,7 +1105,13 @@@ struct kvm_ppc_resize_hpt #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 -#define KVM_CAP_ARM_MTE 199 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 +#define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 ++#define KVM_CAP_ARM_MTE 205 #ifdef KVM_CAP_IRQ_ROUTING diff --cc tools/testing/selftests/kvm/Makefile index bc65c57ae40d,a61b016da0e3..b853be2ae3c6 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@@ -34,8 -34,8 +34,8 @@@ ifeq ($(ARCH),s390 endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S +LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S - LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c + LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test @@@ -81,10 -77,9 +81,10 @@@ TEST_GEN_PROGS_x86_64 += memslot_modifi TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test + TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list - TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --cc tools/testing/selftests/kvm/include/kvm_util.h index 45678a2566dd,ce49e22843d8..615ab254899d --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@@ -379,21 -376,20 +380,23 @@@ uint64_t get_ucall(struct kvm_vm *vm, u } while (0) #define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT((_condition), 0, 0) + __GUEST_ASSERT(_condition, #_condition, 0, 0) #define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT((_condition), 1, (arg1)) + __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) #define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT((_condition), 2, (arg1), (arg2)) + __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3)) + __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4)) + __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) + + #define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) +int vm_get_stats_fd(struct kvm_vm *vm); +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); + #endif /* SELFTEST_KVM_UTIL_H */ diff --cc tools/testing/selftests/kvm/include/x86_64/processor.h index 6d27a5435971,92a62c6999bc..242ae8e09a65 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@@ -55,11 -53,6 +55,9 @@@ #define CPUID_PKU (1ul << 3) #define CPUID_LA57 (1ul << 16) +/* CPUID.0x8000_0001.EDX */ +#define CPUID_GBPAGES (1ul << 26) + - #define UNEXPECTED_VECTOR_PORT 0xfff0u - /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@@ -396,13 -389,9 +394,13 @@@ struct ex_regs void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); - void vm_handle_exception(struct kvm_vm *vm, int vector, + void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr); +void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, + uint64_t pte); + /* * set_cpuid() - overwrites a matching cpuid entry with the provided value. * matches based on ent->function && ent->index. returns true diff --cc tools/testing/selftests/kvm/lib/aarch64/processor.c index ad465ca16237,48b55c93f858..9f49f6caafe5 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@@ -11,8 -12,11 +12,10 @@@ #include "../kvm_util_internal.h" #include "processor.h" -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 + static vm_vaddr_t exception_handlers; + static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { return (v + vm->page_size) & ~(vm->page_size - 1); diff --cc tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 1846117ad584,ed27269a01bb..afbbc40df884 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@@ -421,9 -462,9 +421,9 @@@ int main(int argc, char *argv[] vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); - vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler); + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0); + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);