KVM: arm64: nv: Fix tracking of shadow list registers
authorMarc Zyngier <maz@kernel.org>
Sun, 15 Jun 2025 15:11:38 +0000 (16:11 +0100)
committerMarc Zyngier <maz@kernel.org>
Thu, 19 Jun 2025 08:58:20 +0000 (09:58 +0100)
Wei-Lin reports that the tracking of shadow list registers is
majorly broken when resync'ing the L2 state after a run, as
we confuse the guest's LR index with the host's, potentially
losing the interrupt state.

While this could be fixed by adding yet another side index to
track it (Wei-Lin's fix), it may be better to refactor this
code to avoid having a side index altogether, limiting the
risk to introduce this class of bugs.

A key observation is that the shadow index is always the number
of bits in the lr_map bitmap. With that, the parallel indexing
scheme can be completely dropped.

While doing this, introduce a couple of helpers that abstract
the index conversion and some of the LR repainting, making the
whole exercise much simpler.

Reported-by: Wei-Lin Chang <r09922117@csie.ntu.edu.tw>
Reviewed-by: Wei-Lin Chang <r09922117@csie.ntu.edu.tw>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20250614145721.2504524-1-r09922117@csie.ntu.edu.tw
Link: https://lore.kernel.org/r/86qzzkc5xa.wl-maz@kernel.org
arch/arm64/kvm/vgic/vgic-v3-nested.c

index d22a8ad7bcc518053643fd2515a0bcc808312d5f..a50fb7e6841f79b9852178494ada41924f92b255 100644 (file)
@@ -36,6 +36,11 @@ struct shadow_if {
 
 static DEFINE_PER_CPU(struct shadow_if, shadow_if);
 
+static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
+{
+       return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
+}
+
 /*
  * Nesting GICv3 support
  *
@@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
        return reg;
 }
 
+static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
+{
+       struct vgic_irq *irq;
+
+       if (!(lr & ICH_LR_HW))
+               return lr;
+
+       /* We have the HW bit set, check for validity of pINTID */
+       irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+       /* If there was no real mapping, nuke the HW bit */
+       if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
+               lr &= ~ICH_LR_HW;
+
+       /* Translate the virtual mapping to the real one, even if invalid */
+       if (irq) {
+               lr &= ~ICH_LR_PHYS_ID_MASK;
+               lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return lr;
+}
+
 /*
  * For LRs which have HW bit set such as timer interrupts, we modify them to
  * have the host hardware interrupt number instead of the virtual one programmed
@@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
 static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
                                     struct vgic_v3_cpu_if *s_cpu_if)
 {
-       unsigned long lr_map = 0;
-       int index = 0;
+       struct shadow_if *shadow_if;
+
+       shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
+       shadow_if->lr_map = 0;
 
        for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
                u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
-               struct vgic_irq *irq;
 
                if (!(lr & ICH_LR_STATE))
-                       lr = 0;
-
-               if (!(lr & ICH_LR_HW))
-                       goto next;
-
-               /* We have the HW bit set, check for validity of pINTID */
-               irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
-               if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
-                       /* There was no real mapping, so nuke the HW bit */
-                       lr &= ~ICH_LR_HW;
-                       if (irq)
-                               vgic_put_irq(vcpu->kvm, irq);
-                       goto next;
-               }
-
-               /* Translate the virtual mapping to the real one */
-               lr &= ~ICH_LR_PHYS_ID_MASK;
-               lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+                       continue;
 
-               vgic_put_irq(vcpu->kvm, irq);
+               lr = translate_lr_pintid(vcpu, lr);
 
-next:
-               s_cpu_if->vgic_lr[index] = lr;
-               if (lr) {
-                       lr_map |= BIT(i);
-                       index++;
-               }
+               s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
+               shadow_if->lr_map |= BIT(i);
        }
 
-       container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
-       s_cpu_if->used_lrs = index;
+       s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
 }
 
 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
 {
        struct shadow_if *shadow_if = get_shadow_if();
-       int i, index = 0;
+       int i;
 
        for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
                u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
                struct vgic_irq *irq;
 
                if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
-                       goto next;
+                       continue;
 
                /*
                 * If we had a HW lr programmed by the guest hypervisor, we
@@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
                 */
                irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
                if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
-                       goto next;
+                       continue;
 
-               lr = __gic_v3_get_lr(index);
+               lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
                if (!(lr & ICH_LR_STATE))
                        irq->active = false;
 
                vgic_put_irq(vcpu->kvm, irq);
-       next:
-               index++;
        }
 }
 
@@ -368,13 +373,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
                val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
 
                val &= ~ICH_LR_STATE;
-               val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
+               val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
 
                __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
-               s_cpu_if->vgic_lr[i] = 0;
        }
 
-       shadow_if->lr_map = 0;
        vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
 }