KVM: arm64: nv: Add emulation of AT S12E{0,1}{R,W}
authorMarc Zyngier <maz@kernel.org>
Wed, 19 Jun 2024 07:44:52 +0000 (08:44 +0100)
committerMarc Zyngier <maz@kernel.org>
Fri, 30 Aug 2024 11:04:20 +0000 (12:04 +0100)
On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the
combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk.

However, there is a great deal of complexity coming from combining
the S1 and S2 attributes to report something consistent in PAR_EL1.

This is an absolute mine field, and I have a splitting headache.

Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_asm.h
arch/arm64/kvm/at.c

index 6ec06229697667489a317a2e67864a2cb5c1dc18..b36a3b6cc01169d3e8554382da48e15cba7cbc3f 100644 (file)
@@ -238,6 +238,7 @@ extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
 extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
index 34736c1fe39871a2ef7e3c52950337d99f80be17..9865d29b31498459f2580ba7493f252f30942e5e 100644 (file)
@@ -71,6 +71,200 @@ static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
        return fail;
 }
 
+#define MEMATTR(ic, oc)                (MEMATTR_##oc << 4 | MEMATTR_##ic)
+#define MEMATTR_NC             0b0100
+#define MEMATTR_Wt             0b1000
+#define MEMATTR_Wb             0b1100
+#define MEMATTR_WbRaWa         0b1111
+
+#define MEMATTR_IS_DEVICE(m)   (((m) & GENMASK(7, 4)) == 0)
+
+static u8 s2_memattr_to_attr(u8 memattr)
+{
+       memattr &= 0b1111;
+
+       switch (memattr) {
+       case 0b0000:
+       case 0b0001:
+       case 0b0010:
+       case 0b0011:
+               return memattr << 2;
+       case 0b0100:
+               return MEMATTR(Wb, Wb);
+       case 0b0101:
+               return MEMATTR(NC, NC);
+       case 0b0110:
+               return MEMATTR(Wt, NC);
+       case 0b0111:
+               return MEMATTR(Wb, NC);
+       case 0b1000:
+               /* Reserved, assume NC */
+               return MEMATTR(NC, NC);
+       case 0b1001:
+               return MEMATTR(NC, Wt);
+       case 0b1010:
+               return MEMATTR(Wt, Wt);
+       case 0b1011:
+               return MEMATTR(Wb, Wt);
+       case 0b1100:
+               /* Reserved, assume NC */
+               return MEMATTR(NC, NC);
+       case 0b1101:
+               return MEMATTR(NC, Wb);
+       case 0b1110:
+               return MEMATTR(Wt, Wb);
+       case 0b1111:
+               return MEMATTR(Wb, Wb);
+       default:
+               unreachable();
+       }
+}
+
+static u8 combine_s1_s2_attr(u8 s1, u8 s2)
+{
+       bool transient;
+       u8 final = 0;
+
+       /* Upgrade transient s1 to non-transient to simplify things */
+       switch (s1) {
+       case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
+               transient = true;
+               s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
+               break;
+       case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
+               transient = true;
+               s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
+               break;
+       default:
+               transient = false;
+       }
+
+       /* S2CombineS1AttrHints() */
+       if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
+           (s2 & GENMASK(3, 2)) == MEMATTR_NC)
+               final = MEMATTR_NC;
+       else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
+                (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
+               final = MEMATTR_Wt;
+       else
+               final = MEMATTR_Wb;
+
+       if (final != MEMATTR_NC) {
+               /* Inherit RaWa hints form S1 */
+               if (transient) {
+                       switch (s1 & GENMASK(3, 2)) {
+                       case MEMATTR_Wt:
+                               final = 0;
+                               break;
+                       case MEMATTR_Wb:
+                               final = MEMATTR_NC;
+                               break;
+                       }
+               }
+
+               final |= s1 & GENMASK(1, 0);
+       }
+
+       return final;
+}
+
+#define ATTR_NSH       0b00
+#define ATTR_RSV       0b01
+#define ATTR_OSH       0b10
+#define ATTR_ISH       0b11
+
+static u8 compute_sh(u8 attr, u64 desc)
+{
+       u8 sh;
+
+       /* Any form of device, as well as NC has SH[1:0]=0b10 */
+       if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
+               return ATTR_OSH;
+
+       sh = FIELD_GET(PTE_SHARED, desc);
+       if (sh == ATTR_RSV)             /* Reserved, mapped to NSH */
+               sh = ATTR_NSH;
+
+       return sh;
+}
+
+static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
+                          struct kvm_s2_trans *tr)
+{
+       u8 s1_parattr, s2_memattr, final_attr;
+       u64 par;
+
+       /* If S2 has failed to translate, report the damage */
+       if (tr->esr) {
+               par = SYS_PAR_EL1_RES1;
+               par |= SYS_PAR_EL1_F;
+               par |= SYS_PAR_EL1_S;
+               par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
+               return par;
+       }
+
+       s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
+       s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
+
+       if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
+               if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
+                       s2_memattr &= ~BIT(3);
+
+               /* Combination of R_VRJSW and R_RHWZM */
+               switch (s2_memattr) {
+               case 0b0101:
+                       if (MEMATTR_IS_DEVICE(s1_parattr))
+                               final_attr = s1_parattr;
+                       else
+                               final_attr = MEMATTR(NC, NC);
+                       break;
+               case 0b0110:
+               case 0b1110:
+                       final_attr = MEMATTR(WbRaWa, WbRaWa);
+                       break;
+               case 0b0111:
+               case 0b1111:
+                       /* Preserve S1 attribute */
+                       final_attr = s1_parattr;
+                       break;
+               case 0b0100:
+               case 0b1100:
+               case 0b1101:
+                       /* Reserved, do something non-silly */
+                       final_attr = s1_parattr;
+                       break;
+               default:
+                       /* MemAttr[2]=0, Device from S2 */
+                       final_attr = s2_memattr & GENMASK(1,0) << 2;
+               }
+       } else {
+               /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
+               u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
+
+               if (MEMATTR_IS_DEVICE(s1_parattr) ||
+                   MEMATTR_IS_DEVICE(s2_parattr)) {
+                       final_attr = min(s1_parattr, s2_parattr);
+               } else {
+                       /* At this stage, this is memory vs memory */
+                       final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
+                                                        s2_parattr & 0xf);
+                       final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
+                                                        s2_parattr >> 4) << 4;
+               }
+       }
+
+       if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
+           !MEMATTR_IS_DEVICE(final_attr))
+               final_attr = MEMATTR(NC, NC);
+
+       par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
+       par |= tr->output & GENMASK(47, 12);
+       par |= FIELD_PREP(SYS_PAR_EL1_SH,
+                         compute_sh(final_attr, tr->desc));
+
+       return par;
+}
+
 /*
  * Return the PAR_EL1 value as the result of a valid translation.
  *
@@ -215,3 +409,62 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
 
        vcpu_write_sys_reg(vcpu, par, PAR_EL1);
 }
+
+void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+       struct kvm_s2_trans out = {};
+       u64 ipa, par;
+       bool write;
+       int ret;
+
+       /* Do the stage-1 translation */
+       switch (op) {
+       case OP_AT_S12E1R:
+               op = OP_AT_S1E1R;
+               write = false;
+               break;
+       case OP_AT_S12E1W:
+               op = OP_AT_S1E1W;
+               write = true;
+               break;
+       case OP_AT_S12E0R:
+               op = OP_AT_S1E0R;
+               write = false;
+               break;
+       case OP_AT_S12E0W:
+               op = OP_AT_S1E0W;
+               write = true;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       __kvm_at_s1e01(vcpu, op, vaddr);
+       par = vcpu_read_sys_reg(vcpu, PAR_EL1);
+       if (par & SYS_PAR_EL1_F)
+               return;
+
+       /*
+        * If we only have a single stage of translation (E2H=0 or
+        * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
+        */
+       if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
+           !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
+               return;
+
+       /* Do the stage-2 translation */
+       ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
+       out.esr = 0;
+       ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+       if (ret < 0)
+               return;
+
+       /* Check the access permission */
+       if (!out.esr &&
+           ((!write && !out.readable) || (write && !out.writable)))
+               out.esr = ESR_ELx_FSC_PERM | (out.level & 0x3);
+
+       par = compute_par_s12(vcpu, par, &out);
+       vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}