KVM: arm64: nv: Handle VNCR_EL2 invalidation from MMU notifiers
authorMarc Zyngier <maz@kernel.org>
Wed, 14 May 2025 10:34:54 +0000 (11:34 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 19 May 2025 07:01:19 +0000 (08:01 +0100)
During an invalidation triggered by an MMU notifier, we need to
make sure we can drop the *host* mapping that would have been
translated by the stage-2 mapping being invalidated.

For the moment, the invalidation is pretty brutal, as we nuke
the full IPA range, and therefore any VNCR_EL2 mapping.

At some point, we'll be more light-weight, and the code is able
to deal with something more targetted.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250514103501.2225951-12-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/kvm/nested.c

index e81a0fcfb1f3fa9db774e5439ef9bd030b73b378..825bc1b0d26f2cef35acb72a522ac2fcc1140f16 100644 (file)
@@ -434,6 +434,30 @@ static unsigned int ttl_to_size(u8 ttl)
        return max_size;
 }
 
+static u8 pgshift_level_to_ttl(u16 shift, u8 level)
+{
+       u8 ttl;
+
+       switch(shift) {
+       case 12:
+               ttl = TLBI_TTL_TG_4K;
+               break;
+       case 14:
+               ttl = TLBI_TTL_TG_16K;
+               break;
+       case 16:
+               ttl = TLBI_TTL_TG_64K;
+               break;
+       default:
+               BUG();
+       }
+
+       ttl <<= 2;
+       ttl |= level & 3;
+
+       return ttl;
+}
+
 /*
  * Compute the equivalent of the TTL field by parsing the shadow PT.  The
  * granule size is extracted from the cached VTCR_EL2.TG0 while the level is
@@ -784,6 +808,53 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
        return kvm_inject_nested_sync(vcpu, esr_el2);
 }
 
+static void invalidate_vncr(struct vncr_tlb *vt)
+{
+       vt->valid = false;
+       if (vt->cpu != -1)
+               clear_fixmap(vncr_fixmap(vt->cpu));
+}
+
+static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
+{
+       struct kvm_vcpu *vcpu;
+       unsigned long i;
+
+       lockdep_assert_held_write(&kvm->mmu_lock);
+
+       if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
+               return;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+               u64 ipa_start, ipa_end, ipa_size;
+
+               /*
+                * Careful here: We end-up here from an MMU notifier,
+                * and this can race against a vcpu not being onlined
+                * yet, without the pseudo-TLB being allocated.
+                *
+                * Skip those, as they obviously don't participate in
+                * the invalidation at this stage.
+                */
+               if (!vt)
+                       continue;
+
+               if (!vt->valid)
+                       continue;
+
+               ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
+                                                           vt->wr.level));
+               ipa_start = vt->wr.pa & (ipa_size - 1);
+               ipa_end = ipa_start + ipa_size;
+
+               if (ipa_end <= start || ipa_start >= end)
+                       continue;
+
+               invalidate_vncr(vt);
+       }
+}
+
 void kvm_nested_s2_wp(struct kvm *kvm)
 {
        int i;
@@ -796,6 +867,8 @@ void kvm_nested_s2_wp(struct kvm *kvm)
                if (kvm_s2_mmu_valid(mmu))
                        kvm_stage2_wp_range(mmu, 0, kvm_phys_size(mmu));
        }
+
+       kvm_invalidate_vncr_ipa(kvm, 0, BIT(kvm->arch.mmu.pgt->ia_bits));
 }
 
 void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
@@ -810,6 +883,8 @@ void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
                if (kvm_s2_mmu_valid(mmu))
                        kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block);
        }
+
+       kvm_invalidate_vncr_ipa(kvm, 0, BIT(kvm->arch.mmu.pgt->ia_bits));
 }
 
 void kvm_nested_s2_flush(struct kvm *kvm)