KVM: arm64: np-guest CMOs with PMD_SIZE fixmap
authorVincent Donnefort <vdonnefort@google.com>
Wed, 21 May 2025 12:48:34 +0000 (13:48 +0100)
committerMarc Zyngier <maz@kernel.org>
Wed, 21 May 2025 13:33:51 +0000 (14:33 +0100)
With the introduction of stage-2 huge mappings in the pKVM hypervisor,
guest pages CMO is needed for PMD_SIZE size. Fixmap only supports
PAGE_SIZE and iterating over the huge-page is time consuming (mostly due
to TLBI on hyp_fixmap_unmap) which is a problem for EL2 latency.

Introduce a shared PMD_SIZE fixmap (hyp_fixblock_map/hyp_fixblock_unmap)
to improve guest page CMOs when stage-2 huge mappings are installed.

On a Pixel6, the iterative solution resulted in a latency of ~700us,
while the PMD_SIZE fixmap reduces it to ~100us.

Because of the horrendous private range allocation that would be
necessary, this is disabled for 64KiB pages systems.

Suggested-by: Quentin Perret <qperret@google.com>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20250521124834.1070650-11-vdonnefort@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kvm/hyp/include/nvhe/mm.h
arch/arm64/kvm/hyp/nvhe/mem_protect.c
arch/arm64/kvm/hyp/nvhe/mm.c
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/pgtable.c

index 1b43bcd2a679af29295ee07090216387003e9868..2888b5d037573621c4e126a42f5f21ff9e30b9bd 100644 (file)
@@ -59,6 +59,11 @@ typedef u64 kvm_pte_t;
 
 #define KVM_PHYS_INVALID               (-1ULL)
 
+#define KVM_PTE_TYPE                   BIT(1)
+#define KVM_PTE_TYPE_BLOCK             0
+#define KVM_PTE_TYPE_PAGE              1
+#define KVM_PTE_TYPE_TABLE             1
+
 #define KVM_PTE_LEAF_ATTR_LO           GENMASK(11, 2)
 
 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX        GENMASK(4, 2)
index 230e4f2527def39a1f5fe175b54b1ba637e613f0..6e83ce35c2f2e0d4bec8d329876569ba0bae9dec 100644 (file)
 extern struct kvm_pgtable pkvm_pgtable;
 extern hyp_spinlock_t pkvm_pgd_lock;
 
-int hyp_create_pcpu_fixmap(void);
+int hyp_create_fixmap(void);
 void *hyp_fixmap_map(phys_addr_t phys);
 void hyp_fixmap_unmap(void);
+void *hyp_fixblock_map(phys_addr_t phys, size_t *size);
+void hyp_fixblock_unmap(void);
 
 int hyp_create_idmap(u32 hyp_va_bits);
 int hyp_map_vectors(void);
index 1490820b9ebe36645e4e6ac7b7d53531cfe90103..e359cd24ecf0c36bb6a037c11528414bfbd7a0b3 100644 (file)
@@ -216,34 +216,42 @@ static void guest_s2_put_page(void *addr)
        hyp_put_page(&current_vm->pool, addr);
 }
 
-static void clean_dcache_guest_page(void *va, size_t size)
+static void __apply_guest_page(void *va, size_t size,
+                              void (*func)(void *addr, size_t size))
 {
        size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
        va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
        size = PAGE_ALIGN(size);
 
        while (size) {
-               __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
-                                         PAGE_SIZE);
-               hyp_fixmap_unmap();
-               va += PAGE_SIZE;
-               size -= PAGE_SIZE;
+               size_t map_size = PAGE_SIZE;
+               void *map;
+
+               if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)
+                       map = hyp_fixblock_map(__hyp_pa(va), &map_size);
+               else
+                       map = hyp_fixmap_map(__hyp_pa(va));
+
+               func(map, map_size);
+
+               if (map_size == PMD_SIZE)
+                       hyp_fixblock_unmap();
+               else
+                       hyp_fixmap_unmap();
+
+               size -= map_size;
+               va += map_size;
        }
 }
 
-static void invalidate_icache_guest_page(void *va, size_t size)
+static void clean_dcache_guest_page(void *va, size_t size)
 {
-       size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
-       va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
-       size = PAGE_ALIGN(size);
+       __apply_guest_page(va, size, __clean_dcache_guest_page);
+}
 
-       while (size) {
-               __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
-                                              PAGE_SIZE);
-               hyp_fixmap_unmap();
-               va += PAGE_SIZE;
-               size -= PAGE_SIZE;
-       }
+static void invalidate_icache_guest_page(void *va, size_t size)
+{
+       __apply_guest_page(va, size, __invalidate_icache_guest_page);
 }
 
 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
index f41c7440b34b42c5a3d0c62dae68a5df0bf58414..ae8391baebc30323a629aea0ad9407905c38f9bb 100644 (file)
@@ -229,9 +229,8 @@ int hyp_map_vectors(void)
        return 0;
 }
 
-void *hyp_fixmap_map(phys_addr_t phys)
+static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
 {
-       struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
        kvm_pte_t pte, *ptep = slot->ptep;
 
        pte = *ptep;
@@ -243,10 +242,21 @@ void *hyp_fixmap_map(phys_addr_t phys)
        return (void *)slot->addr;
 }
 
+void *hyp_fixmap_map(phys_addr_t phys)
+{
+       return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
+}
+
 static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
 {
        kvm_pte_t *ptep = slot->ptep;
        u64 addr = slot->addr;
+       u32 level;
+
+       if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE)
+               level = KVM_PGTABLE_LAST_LEVEL;
+       else
+               level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */
 
        WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
 
@@ -260,7 +270,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
         * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
         */
        dsb(ishst);
-       __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
+       __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
        dsb(ish);
        isb();
 }
@@ -273,9 +283,9 @@ void hyp_fixmap_unmap(void)
 static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
                                   enum kvm_pgtable_walk_flags visit)
 {
-       struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
+       struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg;
 
-       if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
+       if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level))
                return -EINVAL;
 
        slot->addr = ctx->addr;
@@ -296,13 +306,84 @@ static int create_fixmap_slot(u64 addr, u64 cpu)
        struct kvm_pgtable_walker walker = {
                .cb     = __create_fixmap_slot_cb,
                .flags  = KVM_PGTABLE_WALK_LEAF,
-               .arg = (void *)cpu,
+               .arg    = per_cpu_ptr(&fixmap_slots, cpu),
        };
 
        return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
 }
 
-int hyp_create_pcpu_fixmap(void)
+#if PAGE_SHIFT < 16
+#define HAS_FIXBLOCK
+static struct hyp_fixmap_slot hyp_fixblock_slot;
+static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock);
+#endif
+
+static int create_fixblock(void)
+{
+#ifdef HAS_FIXBLOCK
+       struct kvm_pgtable_walker walker = {
+               .cb     = __create_fixmap_slot_cb,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
+               .arg    = &hyp_fixblock_slot,
+       };
+       unsigned long addr;
+       phys_addr_t phys;
+       int ret, i;
+
+       /* Find a RAM phys address, PMD aligned */
+       for (i = 0; i < hyp_memblock_nr; i++) {
+               phys = ALIGN(hyp_memory[i].base, PMD_SIZE);
+               if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size))
+                       break;
+       }
+
+       if (i >= hyp_memblock_nr)
+               return -EINVAL;
+
+       hyp_spin_lock(&pkvm_pgd_lock);
+       addr = ALIGN(__io_map_base, PMD_SIZE);
+       ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE);
+       if (ret)
+               goto unlock;
+
+       ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP);
+       if (ret)
+               goto unlock;
+
+       ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker);
+
+unlock:
+       hyp_spin_unlock(&pkvm_pgd_lock);
+
+       return ret;
+#else
+       return 0;
+#endif
+}
+
+void *hyp_fixblock_map(phys_addr_t phys, size_t *size)
+{
+#ifdef HAS_FIXBLOCK
+       *size = PMD_SIZE;
+       hyp_spin_lock(&hyp_fixblock_lock);
+       return fixmap_map_slot(&hyp_fixblock_slot, phys);
+#else
+       *size = PAGE_SIZE;
+       return hyp_fixmap_map(phys);
+#endif
+}
+
+void hyp_fixblock_unmap(void)
+{
+#ifdef HAS_FIXBLOCK
+       fixmap_clear_slot(&hyp_fixblock_slot);
+       hyp_spin_unlock(&hyp_fixblock_lock);
+#else
+       hyp_fixmap_unmap();
+#endif
+}
+
+int hyp_create_fixmap(void)
 {
        unsigned long addr, i;
        int ret;
@@ -322,7 +403,7 @@ int hyp_create_pcpu_fixmap(void)
                        return ret;
        }
 
-       return 0;
+       return create_fixblock();
 }
 
 int hyp_create_idmap(u32 hyp_va_bits)
index c19860fc818362e5cfd335d1677fa4780f696a5e..a48d3f5a5afba7c5e4dbf1a6466c59910f80ce04 100644 (file)
@@ -312,7 +312,7 @@ void __noreturn __pkvm_init_finalise(void)
        if (ret)
                goto out;
 
-       ret = hyp_create_pcpu_fixmap();
+       ret = hyp_create_fixmap();
        if (ret)
                goto out;
 
index df5cc74a7dd0dcbc7c0fa2c8b0ead203b0571eb4..c351b4abd5dbfbcbe738ba0f9efc41a1c99ecc05 100644 (file)
 #include <asm/kvm_pgtable.h>
 #include <asm/stage2_pgtable.h>
 
-
-#define KVM_PTE_TYPE                   BIT(1)
-#define KVM_PTE_TYPE_BLOCK             0
-#define KVM_PTE_TYPE_PAGE              1
-#define KVM_PTE_TYPE_TABLE             1
-
 struct kvm_pgtable_walk_data {
        struct kvm_pgtable_walker       *walker;