2 * VGIC MMIO handling functions
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
14 #include <linux/bitops.h>
15 #include <linux/bsearch.h>
16 #include <linux/kvm.h>
17 #include <linux/kvm_host.h>
18 #include <kvm/iodev.h>
19 #include <kvm/arm_arch_timer.h>
20 #include <kvm/arm_vgic.h>
23 #include "vgic-mmio.h"
25 unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
26 gpa_t addr, unsigned int len)
31 unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
32 gpa_t addr, unsigned int len)
37 void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
38 unsigned int len, unsigned long val)
44 * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
45 * of the enabled bit, so there is only one function for both here.
47 unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
48 gpa_t addr, unsigned int len)
50 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
54 /* Loop over all IRQs affected by this read */
55 for (i = 0; i < len * 8; i++) {
56 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
61 vgic_put_irq(vcpu->kvm, irq);
67 void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
68 gpa_t addr, unsigned int len,
71 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
75 for_each_set_bit(i, &val, len * 8) {
76 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
78 spin_lock_irqsave(&irq->irq_lock, flags);
80 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
82 vgic_put_irq(vcpu->kvm, irq);
86 void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
87 gpa_t addr, unsigned int len,
90 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
94 for_each_set_bit(i, &val, len * 8) {
95 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
97 spin_lock_irqsave(&irq->irq_lock, flags);
101 spin_unlock_irqrestore(&irq->irq_lock, flags);
102 vgic_put_irq(vcpu->kvm, irq);
106 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
107 gpa_t addr, unsigned int len)
109 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
113 /* Loop over all IRQs affected by this read */
114 for (i = 0; i < len * 8; i++) {
115 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
117 if (irq_is_pending(irq))
120 vgic_put_irq(vcpu->kvm, irq);
127 * This function will return the VCPU that performed the MMIO access and
128 * trapped from within the VM, and will return NULL if this is a userspace
131 * We can disable preemption locally around accessing the per-CPU variable,
132 * and use the resolved vcpu pointer after enabling preemption again, because
133 * even if the current thread is migrated to another CPU, reading the per-CPU
134 * value later will give us the same value as we update the per-CPU variable
135 * in the preempt notifier handlers.
137 static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
139 struct kvm_vcpu *vcpu;
142 vcpu = kvm_arm_get_running_vcpu();
147 /* Must be called with irq->irq_lock held */
148 static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
154 irq->pending_latch = true;
155 vgic_irq_set_phys_active(irq, true);
158 void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
159 gpa_t addr, unsigned int len,
162 bool is_uaccess = !vgic_get_mmio_requester_vcpu();
163 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
167 for_each_set_bit(i, &val, len * 8) {
168 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
170 spin_lock_irqsave(&irq->irq_lock, flags);
172 vgic_hw_irq_spending(vcpu, irq, is_uaccess);
174 irq->pending_latch = true;
175 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
176 vgic_put_irq(vcpu->kvm, irq);
180 /* Must be called with irq->irq_lock held */
181 static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
187 irq->pending_latch = false;
190 * We don't want the guest to effectively mask the physical
191 * interrupt by doing a write to SPENDR followed by a write to
192 * CPENDR for HW interrupts, so we clear the active state on
193 * the physical side if the virtual interrupt is not active.
194 * This may lead to taking an additional interrupt on the
195 * host, but that should not be a problem as the worst that
196 * can happen is an additional vgic injection. We also clear
197 * the pending state to maintain proper semantics for edge HW
200 vgic_irq_set_phys_pending(irq, false);
202 vgic_irq_set_phys_active(irq, false);
205 void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
206 gpa_t addr, unsigned int len,
209 bool is_uaccess = !vgic_get_mmio_requester_vcpu();
210 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
214 for_each_set_bit(i, &val, len * 8) {
215 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
217 spin_lock_irqsave(&irq->irq_lock, flags);
220 vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
222 irq->pending_latch = false;
224 spin_unlock_irqrestore(&irq->irq_lock, flags);
225 vgic_put_irq(vcpu->kvm, irq);
229 unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
230 gpa_t addr, unsigned int len)
232 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
236 /* Loop over all IRQs affected by this read */
237 for (i = 0; i < len * 8; i++) {
238 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
243 vgic_put_irq(vcpu->kvm, irq);
249 /* Must be called with irq->irq_lock held */
250 static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
251 bool active, bool is_uaccess)
256 irq->active = active;
257 vgic_irq_set_phys_active(irq, active);
260 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
264 struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
266 spin_lock_irqsave(&irq->irq_lock, flags);
269 * If this virtual IRQ was written into a list register, we
270 * have to make sure the CPU that runs the VCPU thread has
271 * synced back the LR state to the struct vgic_irq.
273 * As long as the conditions below are true, we know the VCPU thread
274 * may be on its way back from the guest (we kicked the VCPU thread in
275 * vgic_change_active_prepare) and still has to sync back this IRQ,
276 * so we release and re-acquire the spin_lock to let the other thread
279 * When accessing VGIC state from user space, requester_vcpu is
280 * NULL, which is fine, because we guarantee that no VCPUs are running
281 * when accessing VGIC state from user space so irq->vcpu->cpu is
284 while (irq->vcpu && /* IRQ may have state in an LR somewhere */
285 irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
286 irq->vcpu->cpu != -1) /* VCPU thread is running */
287 cond_resched_lock(&irq->irq_lock);
290 vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
292 irq->active = active;
295 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
297 spin_unlock_irqrestore(&irq->irq_lock, flags);
301 * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
302 * is not queued on some running VCPU's LRs, because then the change to the
303 * active state can be overwritten when the VCPU's state is synced coming back
306 * For shared interrupts, we have to stop all the VCPUs because interrupts can
307 * be migrated while we don't hold the IRQ locks and we don't want to be
308 * chasing moving targets.
310 * For private interrupts we don't have to do anything because userspace
311 * accesses to the VGIC state already require all VCPUs to be stopped, and
312 * only the VCPU itself can modify its private interrupts active state, which
313 * guarantees that the VCPU is not running.
315 static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
317 if (intid > VGIC_NR_PRIVATE_IRQS)
318 kvm_arm_halt_guest(vcpu->kvm);
321 /* See vgic_change_active_prepare */
322 static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
324 if (intid > VGIC_NR_PRIVATE_IRQS)
325 kvm_arm_resume_guest(vcpu->kvm);
328 static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
329 gpa_t addr, unsigned int len,
332 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
335 for_each_set_bit(i, &val, len * 8) {
336 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
337 vgic_mmio_change_active(vcpu, irq, false);
338 vgic_put_irq(vcpu->kvm, irq);
342 void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
343 gpa_t addr, unsigned int len,
346 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
348 mutex_lock(&vcpu->kvm->lock);
349 vgic_change_active_prepare(vcpu, intid);
351 __vgic_mmio_write_cactive(vcpu, addr, len, val);
353 vgic_change_active_finish(vcpu, intid);
354 mutex_unlock(&vcpu->kvm->lock);
357 void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
358 gpa_t addr, unsigned int len,
361 __vgic_mmio_write_cactive(vcpu, addr, len, val);
364 static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
365 gpa_t addr, unsigned int len,
368 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
371 for_each_set_bit(i, &val, len * 8) {
372 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
373 vgic_mmio_change_active(vcpu, irq, true);
374 vgic_put_irq(vcpu->kvm, irq);
378 void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
379 gpa_t addr, unsigned int len,
382 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
384 mutex_lock(&vcpu->kvm->lock);
385 vgic_change_active_prepare(vcpu, intid);
387 __vgic_mmio_write_sactive(vcpu, addr, len, val);
389 vgic_change_active_finish(vcpu, intid);
390 mutex_unlock(&vcpu->kvm->lock);
393 void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
394 gpa_t addr, unsigned int len,
397 __vgic_mmio_write_sactive(vcpu, addr, len, val);
400 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
401 gpa_t addr, unsigned int len)
403 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
407 for (i = 0; i < len; i++) {
408 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
410 val |= (u64)irq->priority << (i * 8);
412 vgic_put_irq(vcpu->kvm, irq);
419 * We currently don't handle changing the priority of an interrupt that
420 * is already pending on a VCPU. If there is a need for this, we would
421 * need to make this VCPU exit and re-evaluate the priorities, potentially
422 * leading to this interrupt getting presented now to the guest (if it has
423 * been masked by the priority mask before).
425 void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
426 gpa_t addr, unsigned int len,
429 u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
433 for (i = 0; i < len; i++) {
434 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
436 spin_lock_irqsave(&irq->irq_lock, flags);
437 /* Narrow the priority range to what we actually support */
438 irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
439 spin_unlock_irqrestore(&irq->irq_lock, flags);
441 vgic_put_irq(vcpu->kvm, irq);
445 unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
446 gpa_t addr, unsigned int len)
448 u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
452 for (i = 0; i < len * 4; i++) {
453 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
455 if (irq->config == VGIC_CONFIG_EDGE)
456 value |= (2U << (i * 2));
458 vgic_put_irq(vcpu->kvm, irq);
464 void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
465 gpa_t addr, unsigned int len,
468 u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
472 for (i = 0; i < len * 4; i++) {
473 struct vgic_irq *irq;
476 * The configuration cannot be changed for SGIs in general,
477 * for PPIs this is IMPLEMENTATION DEFINED. The arch timer
478 * code relies on PPIs being level triggered, so we also
479 * make them read-only here.
481 if (intid + i < VGIC_NR_PRIVATE_IRQS)
484 irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
485 spin_lock_irqsave(&irq->irq_lock, flags);
487 if (test_bit(i * 2 + 1, &val))
488 irq->config = VGIC_CONFIG_EDGE;
490 irq->config = VGIC_CONFIG_LEVEL;
492 spin_unlock_irqrestore(&irq->irq_lock, flags);
493 vgic_put_irq(vcpu->kvm, irq);
497 u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
501 int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
503 for (i = 0; i < 32; i++) {
504 struct vgic_irq *irq;
506 if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
509 irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
510 if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level)
513 vgic_put_irq(vcpu->kvm, irq);
519 void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
523 int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
526 for (i = 0; i < 32; i++) {
527 struct vgic_irq *irq;
530 if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
533 irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
536 * Line level is set irrespective of irq type
537 * (level or edge) to avoid dependency that VM should
538 * restore irq config before line level.
540 new_level = !!(val & (1U << i));
541 spin_lock_irqsave(&irq->irq_lock, flags);
542 irq->line_level = new_level;
544 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
546 spin_unlock_irqrestore(&irq->irq_lock, flags);
548 vgic_put_irq(vcpu->kvm, irq);
552 static int match_region(const void *key, const void *elt)
554 const unsigned int offset = (unsigned long)key;
555 const struct vgic_register_region *region = elt;
557 if (offset < region->reg_offset)
560 if (offset >= region->reg_offset + region->len)
566 const struct vgic_register_region *
567 vgic_find_mmio_region(const struct vgic_register_region *regions,
568 int nr_regions, unsigned int offset)
570 return bsearch((void *)(uintptr_t)offset, regions, nr_regions,
571 sizeof(regions[0]), match_region);
574 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
576 if (kvm_vgic_global_state.type == VGIC_V2)
577 vgic_v2_set_vmcr(vcpu, vmcr);
579 vgic_v3_set_vmcr(vcpu, vmcr);
582 void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
584 if (kvm_vgic_global_state.type == VGIC_V2)
585 vgic_v2_get_vmcr(vcpu, vmcr);
587 vgic_v3_get_vmcr(vcpu, vmcr);
591 * kvm_mmio_read_buf() returns a value in a format where it can be converted
592 * to a byte array and be directly observed as the guest wanted it to appear
593 * in memory if it had done the store itself, which is LE for the GIC, as the
594 * guest knows the GIC is always LE.
596 * We convert this value to the CPUs native format to deal with it as a data
599 unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
601 unsigned long data = kvm_mmio_read_buf(val, len);
607 return le16_to_cpu(data);
609 return le32_to_cpu(data);
611 return le64_to_cpu(data);
616 * kvm_mmio_write_buf() expects a value in a format such that if converted to
617 * a byte array it is observed as the guest would see it if it could perform
618 * the load directly. Since the GIC is LE, and the guest knows this, the
619 * guest expects a value in little endian format.
621 * We convert the data value from the CPUs native format to LE so that the
622 * value is returned in the proper format.
624 void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
631 data = cpu_to_le16(data);
634 data = cpu_to_le32(data);
637 data = cpu_to_le64(data);
640 kvm_mmio_write_buf(buf, len, data);
644 struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
646 return container_of(dev, struct vgic_io_device, dev);
649 static bool check_region(const struct kvm *kvm,
650 const struct vgic_register_region *region,
653 int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
657 flags = VGIC_ACCESS_8bit;
660 flags = VGIC_ACCESS_32bit;
663 flags = VGIC_ACCESS_64bit;
669 if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
670 if (!region->bits_per_irq)
673 /* Do we access a non-allocated IRQ? */
674 return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
680 const struct vgic_register_region *
681 vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
684 const struct vgic_register_region *region;
686 region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
687 addr - iodev->base_addr);
688 if (!region || !check_region(vcpu->kvm, region, addr, len))
694 static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
695 gpa_t addr, u32 *val)
697 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
698 const struct vgic_register_region *region;
699 struct kvm_vcpu *r_vcpu;
701 region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
707 r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
708 if (region->uaccess_read)
709 *val = region->uaccess_read(r_vcpu, addr, sizeof(u32));
711 *val = region->read(r_vcpu, addr, sizeof(u32));
716 static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
717 gpa_t addr, const u32 *val)
719 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
720 const struct vgic_register_region *region;
721 struct kvm_vcpu *r_vcpu;
723 region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
727 r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
728 if (region->uaccess_write)
729 region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
731 region->write(r_vcpu, addr, sizeof(u32), *val);
737 * Userland access to VGIC registers.
739 int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
740 bool is_write, int offset, u32 *val)
743 return vgic_uaccess_write(vcpu, &dev->dev, offset, val);
745 return vgic_uaccess_read(vcpu, &dev->dev, offset, val);
748 static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
749 gpa_t addr, int len, void *val)
751 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
752 const struct vgic_register_region *region;
753 unsigned long data = 0;
755 region = vgic_get_mmio_region(vcpu, iodev, addr, len);
761 switch (iodev->iodev_type) {
763 data = region->read(vcpu, addr, len);
766 data = region->read(vcpu, addr, len);
769 data = region->read(iodev->redist_vcpu, addr, len);
772 data = region->its_read(vcpu->kvm, iodev->its, addr, len);
776 vgic_data_host_to_mmio_bus(val, len, data);
780 static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
781 gpa_t addr, int len, const void *val)
783 struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
784 const struct vgic_register_region *region;
785 unsigned long data = vgic_data_mmio_bus_to_host(val, len);
787 region = vgic_get_mmio_region(vcpu, iodev, addr, len);
791 switch (iodev->iodev_type) {
793 region->write(vcpu, addr, len, data);
796 region->write(vcpu, addr, len, data);
799 region->write(iodev->redist_vcpu, addr, len, data);
802 region->its_write(vcpu->kvm, iodev->its, addr, len, data);
809 struct kvm_io_device_ops kvm_io_gic_ops = {
810 .read = dispatch_mmio_read,
811 .write = dispatch_mmio_write,
814 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
817 struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
823 len = vgic_v2_init_dist_iodev(io_device);
826 len = vgic_v3_init_dist_iodev(io_device);
832 io_device->base_addr = dist_base_address;
833 io_device->iodev_type = IODEV_DIST;
834 io_device->redist_vcpu = NULL;
836 mutex_lock(&kvm->slots_lock);
837 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
838 len, &io_device->dev);
839 mutex_unlock(&kvm->slots_lock);