KVM: arm64: PMU: Add counter_index_to_*reg() helpers
[linux-block.git] / arch / arm64 / kvm / pmu-emul.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list.h>
11 #include <linux/perf_event.h>
12 #include <linux/perf/arm_pmu.h>
13 #include <linux/uaccess.h>
14 #include <asm/kvm_emulate.h>
15 #include <kvm/arm_pmu.h>
16 #include <kvm/arm_vgic.h>
17
18 #define PERF_ATTR_CFG1_COUNTER_64BIT    BIT(0)
19
20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
21
22 static LIST_HEAD(arm_pmus);
23 static DEFINE_MUTEX(arm_pmus_lock);
24
25 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
26
27 static u32 kvm_pmu_event_mask(struct kvm *kvm)
28 {
29         unsigned int pmuver;
30
31         pmuver = kvm->arch.arm_pmu->pmuver;
32
33         switch (pmuver) {
34         case ID_AA64DFR0_EL1_PMUVer_IMP:
35                 return GENMASK(9, 0);
36         case ID_AA64DFR0_EL1_PMUVer_V3P1:
37         case ID_AA64DFR0_EL1_PMUVer_V3P4:
38         case ID_AA64DFR0_EL1_PMUVer_V3P5:
39         case ID_AA64DFR0_EL1_PMUVer_V3P7:
40                 return GENMASK(15, 0);
41         default:                /* Shouldn't be here, just for sanity */
42                 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
43                 return 0;
44         }
45 }
46
47 /**
48  * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
49  * @vcpu: The vcpu pointer
50  * @select_idx: The counter index
51  */
52 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
53 {
54         return (select_idx == ARMV8_PMU_CYCLE_IDX);
55 }
56
57 static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx)
58 {
59         return (select_idx == ARMV8_PMU_CYCLE_IDX &&
60                 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
61 }
62
63 static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
64 {
65         return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX &&
66                 !kvm_pmu_idx_has_64bit_overflow(vcpu, idx));
67 }
68
69 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
70 {
71         struct kvm_pmu *pmu;
72         struct kvm_vcpu_arch *vcpu_arch;
73
74         pmc -= pmc->idx;
75         pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
76         vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
77         return container_of(vcpu_arch, struct kvm_vcpu, arch);
78 }
79
80 static u32 counter_index_to_reg(u64 idx)
81 {
82         return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
83 }
84
85 static u32 counter_index_to_evtreg(u64 idx)
86 {
87         return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
88 }
89
90 /**
91  * kvm_pmu_get_counter_value - get PMU counter value
92  * @vcpu: The vcpu pointer
93  * @select_idx: The counter index
94  */
95 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
96 {
97         u64 counter, reg, enabled, running;
98         struct kvm_pmu *pmu = &vcpu->arch.pmu;
99         struct kvm_pmc *pmc = &pmu->pmc[select_idx];
100
101         if (!kvm_vcpu_has_pmu(vcpu))
102                 return 0;
103
104         reg = counter_index_to_reg(select_idx);
105         counter = __vcpu_sys_reg(vcpu, reg);
106
107         /*
108          * The real counter value is equal to the value of counter register plus
109          * the value perf event counts.
110          */
111         if (pmc->perf_event)
112                 counter += perf_event_read_value(pmc->perf_event, &enabled,
113                                                  &running);
114
115         if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
116                 counter = lower_32_bits(counter);
117
118         return counter;
119 }
120
121 /**
122  * kvm_pmu_set_counter_value - set PMU counter value
123  * @vcpu: The vcpu pointer
124  * @select_idx: The counter index
125  * @val: The counter value
126  */
127 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
128 {
129         u64 reg;
130
131         if (!kvm_vcpu_has_pmu(vcpu))
132                 return;
133
134         reg = counter_index_to_reg(select_idx);
135         __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
136
137         /* Recreate the perf event to reflect the updated sample_period */
138         kvm_pmu_create_perf_event(vcpu, select_idx);
139 }
140
141 /**
142  * kvm_pmu_release_perf_event - remove the perf event
143  * @pmc: The PMU counter pointer
144  */
145 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
146 {
147         if (pmc->perf_event) {
148                 perf_event_disable(pmc->perf_event);
149                 perf_event_release_kernel(pmc->perf_event);
150                 pmc->perf_event = NULL;
151         }
152 }
153
154 /**
155  * kvm_pmu_stop_counter - stop PMU counter
156  * @pmc: The PMU counter pointer
157  *
158  * If this counter has been configured to monitor some event, release it here.
159  */
160 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
161 {
162         u64 reg, val;
163
164         if (!pmc->perf_event)
165                 return;
166
167         val = kvm_pmu_get_counter_value(vcpu, pmc->idx);
168
169         reg = counter_index_to_reg(pmc->idx);
170
171         __vcpu_sys_reg(vcpu, reg) = val;
172
173         kvm_pmu_release_perf_event(pmc);
174 }
175
176 /**
177  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
178  * @vcpu: The vcpu pointer
179  *
180  */
181 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
182 {
183         int i;
184         struct kvm_pmu *pmu = &vcpu->arch.pmu;
185
186         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
187                 pmu->pmc[i].idx = i;
188 }
189
190 /**
191  * kvm_pmu_vcpu_reset - reset pmu state for cpu
192  * @vcpu: The vcpu pointer
193  *
194  */
195 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
196 {
197         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
198         struct kvm_pmu *pmu = &vcpu->arch.pmu;
199         int i;
200
201         for_each_set_bit(i, &mask, 32)
202                 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
203 }
204
205 /**
206  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
207  * @vcpu: The vcpu pointer
208  *
209  */
210 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
211 {
212         int i;
213         struct kvm_pmu *pmu = &vcpu->arch.pmu;
214
215         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
216                 kvm_pmu_release_perf_event(&pmu->pmc[i]);
217         irq_work_sync(&vcpu->arch.pmu.overflow_work);
218 }
219
220 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
221 {
222         u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
223
224         val &= ARMV8_PMU_PMCR_N_MASK;
225         if (val == 0)
226                 return BIT(ARMV8_PMU_CYCLE_IDX);
227         else
228                 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
229 }
230
231 /**
232  * kvm_pmu_enable_counter_mask - enable selected PMU counters
233  * @vcpu: The vcpu pointer
234  * @val: the value guest writes to PMCNTENSET register
235  *
236  * Call perf_event_enable to start counting the perf event
237  */
238 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
239 {
240         int i;
241         struct kvm_pmu *pmu = &vcpu->arch.pmu;
242         struct kvm_pmc *pmc;
243
244         if (!kvm_vcpu_has_pmu(vcpu))
245                 return;
246
247         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
248                 return;
249
250         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
251                 if (!(val & BIT(i)))
252                         continue;
253
254                 pmc = &pmu->pmc[i];
255
256                 if (!pmc->perf_event) {
257                         kvm_pmu_create_perf_event(vcpu, i);
258                 } else {
259                         perf_event_enable(pmc->perf_event);
260                         if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
261                                 kvm_debug("fail to enable perf event\n");
262                 }
263         }
264 }
265
266 /**
267  * kvm_pmu_disable_counter_mask - disable selected PMU counters
268  * @vcpu: The vcpu pointer
269  * @val: the value guest writes to PMCNTENCLR register
270  *
271  * Call perf_event_disable to stop counting the perf event
272  */
273 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
274 {
275         int i;
276         struct kvm_pmu *pmu = &vcpu->arch.pmu;
277         struct kvm_pmc *pmc;
278
279         if (!kvm_vcpu_has_pmu(vcpu) || !val)
280                 return;
281
282         for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
283                 if (!(val & BIT(i)))
284                         continue;
285
286                 pmc = &pmu->pmc[i];
287
288                 if (pmc->perf_event)
289                         perf_event_disable(pmc->perf_event);
290         }
291 }
292
293 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
294 {
295         u64 reg = 0;
296
297         if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
298                 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
299                 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
300                 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
301         }
302
303         return reg;
304 }
305
306 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
307 {
308         struct kvm_pmu *pmu = &vcpu->arch.pmu;
309         bool overflow;
310
311         if (!kvm_vcpu_has_pmu(vcpu))
312                 return;
313
314         overflow = !!kvm_pmu_overflow_status(vcpu);
315         if (pmu->irq_level == overflow)
316                 return;
317
318         pmu->irq_level = overflow;
319
320         if (likely(irqchip_in_kernel(vcpu->kvm))) {
321                 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
322                                               pmu->irq_num, overflow, pmu);
323                 WARN_ON(ret);
324         }
325 }
326
327 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
328 {
329         struct kvm_pmu *pmu = &vcpu->arch.pmu;
330         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
331         bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
332
333         if (likely(irqchip_in_kernel(vcpu->kvm)))
334                 return false;
335
336         return pmu->irq_level != run_level;
337 }
338
339 /*
340  * Reflect the PMU overflow interrupt output level into the kvm_run structure
341  */
342 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
343 {
344         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
345
346         /* Populate the timer bitmap for user space */
347         regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
348         if (vcpu->arch.pmu.irq_level)
349                 regs->device_irq_level |= KVM_ARM_DEV_PMU;
350 }
351
352 /**
353  * kvm_pmu_flush_hwstate - flush pmu state to cpu
354  * @vcpu: The vcpu pointer
355  *
356  * Check if the PMU has overflowed while we were running in the host, and inject
357  * an interrupt if that was the case.
358  */
359 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
360 {
361         kvm_pmu_update_state(vcpu);
362 }
363
364 /**
365  * kvm_pmu_sync_hwstate - sync pmu state from cpu
366  * @vcpu: The vcpu pointer
367  *
368  * Check if the PMU has overflowed while we were running in the guest, and
369  * inject an interrupt if that was the case.
370  */
371 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
372 {
373         kvm_pmu_update_state(vcpu);
374 }
375
376 /**
377  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
378  * to the event.
379  * This is why we need a callback to do it once outside of the NMI context.
380  */
381 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
382 {
383         struct kvm_vcpu *vcpu;
384         struct kvm_pmu *pmu;
385
386         pmu = container_of(work, struct kvm_pmu, overflow_work);
387         vcpu = kvm_pmc_to_vcpu(pmu->pmc);
388
389         kvm_vcpu_kick(vcpu);
390 }
391
392 /*
393  * Perform an increment on any of the counters described in @mask,
394  * generating the overflow if required, and propagate it as a chained
395  * event if possible.
396  */
397 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
398                                       unsigned long mask, u32 event)
399 {
400         int i;
401
402         if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
403                 return;
404
405         /* Weed out disabled counters */
406         mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
407
408         for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
409                 u64 type, reg;
410
411                 /* Filter on event type */
412                 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
413                 type &= kvm_pmu_event_mask(vcpu->kvm);
414                 if (type != event)
415                         continue;
416
417                 /* Increment this counter */
418                 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
419                 if (!kvm_pmu_idx_is_64bit(vcpu, i))
420                         reg = lower_32_bits(reg);
421                 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
422
423                 /* No overflow? move on */
424                 if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg))
425                         continue;
426
427                 /* Mark overflow */
428                 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
429
430                 if (kvm_pmu_counter_can_chain(vcpu, i))
431                         kvm_pmu_counter_increment(vcpu, BIT(i + 1),
432                                                   ARMV8_PMUV3_PERFCTR_CHAIN);
433         }
434 }
435
436 /* Compute the sample period for a given counter value */
437 static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter)
438 {
439         u64 val;
440
441         if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) {
442                 if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx))
443                         val = -(counter & GENMASK(31, 0));
444                 else
445                         val = (-counter) & GENMASK(63, 0);
446         } else {
447                 val = (-counter) & GENMASK(31, 0);
448         }
449
450         return val;
451 }
452
453 /**
454  * When the perf event overflows, set the overflow status and inform the vcpu.
455  */
456 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
457                                   struct perf_sample_data *data,
458                                   struct pt_regs *regs)
459 {
460         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
461         struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
462         struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
463         int idx = pmc->idx;
464         u64 period;
465
466         cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
467
468         /*
469          * Reset the sample period to the architectural limit,
470          * i.e. the point where the counter overflows.
471          */
472         period = compute_period(vcpu, idx, local64_read(&perf_event->count));
473
474         local64_set(&perf_event->hw.period_left, 0);
475         perf_event->attr.sample_period = period;
476         perf_event->hw.sample_period = period;
477
478         __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
479
480         if (kvm_pmu_counter_can_chain(vcpu, idx))
481                 kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
482                                           ARMV8_PMUV3_PERFCTR_CHAIN);
483
484         if (kvm_pmu_overflow_status(vcpu)) {
485                 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
486
487                 if (!in_nmi())
488                         kvm_vcpu_kick(vcpu);
489                 else
490                         irq_work_queue(&vcpu->arch.pmu.overflow_work);
491         }
492
493         cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
494 }
495
496 /**
497  * kvm_pmu_software_increment - do software increment
498  * @vcpu: The vcpu pointer
499  * @val: the value guest writes to PMSWINC register
500  */
501 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
502 {
503         kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
504 }
505
506 /**
507  * kvm_pmu_handle_pmcr - handle PMCR register
508  * @vcpu: The vcpu pointer
509  * @val: the value guest writes to PMCR register
510  */
511 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
512 {
513         int i;
514
515         if (!kvm_vcpu_has_pmu(vcpu))
516                 return;
517
518         if (val & ARMV8_PMU_PMCR_E) {
519                 kvm_pmu_enable_counter_mask(vcpu,
520                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
521         } else {
522                 kvm_pmu_disable_counter_mask(vcpu,
523                        __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
524         }
525
526         if (val & ARMV8_PMU_PMCR_C)
527                 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
528
529         if (val & ARMV8_PMU_PMCR_P) {
530                 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
531                 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
532                 for_each_set_bit(i, &mask, 32)
533                         kvm_pmu_set_counter_value(vcpu, i, 0);
534         }
535 }
536
537 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
538 {
539         return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
540                (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
541 }
542
543 /**
544  * kvm_pmu_create_perf_event - create a perf event for a counter
545  * @vcpu: The vcpu pointer
546  * @select_idx: The number of selected counter
547  */
548 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
549 {
550         struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
551         struct kvm_pmu *pmu = &vcpu->arch.pmu;
552         struct kvm_pmc *pmc = &pmu->pmc[select_idx];
553         struct perf_event *event;
554         struct perf_event_attr attr;
555         u64 eventsel, counter, reg, data;
556
557         reg = counter_index_to_evtreg(select_idx);
558         data = __vcpu_sys_reg(vcpu, reg);
559
560         kvm_pmu_stop_counter(vcpu, pmc);
561         if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
562                 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
563         else
564                 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
565
566         /*
567          * Neither SW increment nor chained events need to be backed
568          * by a perf event.
569          */
570         if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
571             eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
572                 return;
573
574         /*
575          * If we have a filter in place and that the event isn't allowed, do
576          * not install a perf event either.
577          */
578         if (vcpu->kvm->arch.pmu_filter &&
579             !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
580                 return;
581
582         memset(&attr, 0, sizeof(struct perf_event_attr));
583         attr.type = arm_pmu->pmu.type;
584         attr.size = sizeof(attr);
585         attr.pinned = 1;
586         attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
587         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
588         attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
589         attr.exclude_hv = 1; /* Don't count EL2 events */
590         attr.exclude_host = 1; /* Don't count host events */
591         attr.config = eventsel;
592
593         counter = kvm_pmu_get_counter_value(vcpu, select_idx);
594
595         /*
596          * If counting with a 64bit counter, advertise it to the perf
597          * code, carefully dealing with the initial sample period
598          * which also depends on the overflow.
599          */
600         if (kvm_pmu_idx_is_64bit(vcpu, select_idx))
601                 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
602
603         attr.sample_period = compute_period(vcpu, select_idx, counter);
604
605         event = perf_event_create_kernel_counter(&attr, -1, current,
606                                                  kvm_pmu_perf_overflow, pmc);
607
608         if (IS_ERR(event)) {
609                 pr_err_once("kvm: pmu event creation failed %ld\n",
610                             PTR_ERR(event));
611                 return;
612         }
613
614         pmc->perf_event = event;
615 }
616
617 /**
618  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
619  * @vcpu: The vcpu pointer
620  * @data: The data guest writes to PMXEVTYPER_EL0
621  * @select_idx: The number of selected counter
622  *
623  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
624  * event with given hardware event number. Here we call perf_event API to
625  * emulate this action and create a kernel perf event for it.
626  */
627 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
628                                     u64 select_idx)
629 {
630         u64 reg, mask;
631
632         if (!kvm_vcpu_has_pmu(vcpu))
633                 return;
634
635         mask  =  ARMV8_PMU_EVTYPE_MASK;
636         mask &= ~ARMV8_PMU_EVTYPE_EVENT;
637         mask |= kvm_pmu_event_mask(vcpu->kvm);
638
639         reg = counter_index_to_evtreg(select_idx);
640
641         __vcpu_sys_reg(vcpu, reg) = data & mask;
642
643         kvm_pmu_create_perf_event(vcpu, select_idx);
644 }
645
646 void kvm_host_pmu_init(struct arm_pmu *pmu)
647 {
648         struct arm_pmu_entry *entry;
649
650         if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
651                 return;
652
653         mutex_lock(&arm_pmus_lock);
654
655         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
656         if (!entry)
657                 goto out_unlock;
658
659         entry->arm_pmu = pmu;
660         list_add_tail(&entry->entry, &arm_pmus);
661
662         if (list_is_singular(&arm_pmus))
663                 static_branch_enable(&kvm_arm_pmu_available);
664
665 out_unlock:
666         mutex_unlock(&arm_pmus_lock);
667 }
668
669 static struct arm_pmu *kvm_pmu_probe_armpmu(void)
670 {
671         struct perf_event_attr attr = { };
672         struct perf_event *event;
673         struct arm_pmu *pmu = NULL;
674
675         /*
676          * Create a dummy event that only counts user cycles. As we'll never
677          * leave this function with the event being live, it will never
678          * count anything. But it allows us to probe some of the PMU
679          * details. Yes, this is terrible.
680          */
681         attr.type = PERF_TYPE_RAW;
682         attr.size = sizeof(attr);
683         attr.pinned = 1;
684         attr.disabled = 0;
685         attr.exclude_user = 0;
686         attr.exclude_kernel = 1;
687         attr.exclude_hv = 1;
688         attr.exclude_host = 1;
689         attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
690         attr.sample_period = GENMASK(63, 0);
691
692         event = perf_event_create_kernel_counter(&attr, -1, current,
693                                                  kvm_pmu_perf_overflow, &attr);
694
695         if (IS_ERR(event)) {
696                 pr_err_once("kvm: pmu event creation failed %ld\n",
697                             PTR_ERR(event));
698                 return NULL;
699         }
700
701         if (event->pmu) {
702                 pmu = to_arm_pmu(event->pmu);
703                 if (pmu->pmuver == 0 ||
704                     pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
705                         pmu = NULL;
706         }
707
708         perf_event_disable(event);
709         perf_event_release_kernel(event);
710
711         return pmu;
712 }
713
714 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
715 {
716         unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
717         u64 val, mask = 0;
718         int base, i, nr_events;
719
720         if (!kvm_vcpu_has_pmu(vcpu))
721                 return 0;
722
723         if (!pmceid1) {
724                 val = read_sysreg(pmceid0_el0);
725                 /* always support CHAIN */
726                 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
727                 base = 0;
728         } else {
729                 val = read_sysreg(pmceid1_el0);
730                 /*
731                  * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
732                  * as RAZ
733                  */
734                 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
735                         val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
736                 base = 32;
737         }
738
739         if (!bmap)
740                 return val;
741
742         nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
743
744         for (i = 0; i < 32; i += 8) {
745                 u64 byte;
746
747                 byte = bitmap_get_value8(bmap, base + i);
748                 mask |= byte << i;
749                 if (nr_events >= (0x4000 + base + 32)) {
750                         byte = bitmap_get_value8(bmap, 0x4000 + base + i);
751                         mask |= byte << (32 + i);
752                 }
753         }
754
755         return val & mask;
756 }
757
758 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
759 {
760         if (!kvm_vcpu_has_pmu(vcpu))
761                 return 0;
762
763         if (!vcpu->arch.pmu.created)
764                 return -EINVAL;
765
766         /*
767          * A valid interrupt configuration for the PMU is either to have a
768          * properly configured interrupt number and using an in-kernel
769          * irqchip, or to not have an in-kernel GIC and not set an IRQ.
770          */
771         if (irqchip_in_kernel(vcpu->kvm)) {
772                 int irq = vcpu->arch.pmu.irq_num;
773                 /*
774                  * If we are using an in-kernel vgic, at this point we know
775                  * the vgic will be initialized, so we can check the PMU irq
776                  * number against the dimensions of the vgic and make sure
777                  * it's valid.
778                  */
779                 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
780                         return -EINVAL;
781         } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
782                    return -EINVAL;
783         }
784
785         /* One-off reload of the PMU on first run */
786         kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
787
788         return 0;
789 }
790
791 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
792 {
793         if (irqchip_in_kernel(vcpu->kvm)) {
794                 int ret;
795
796                 /*
797                  * If using the PMU with an in-kernel virtual GIC
798                  * implementation, we require the GIC to be already
799                  * initialized when initializing the PMU.
800                  */
801                 if (!vgic_initialized(vcpu->kvm))
802                         return -ENODEV;
803
804                 if (!kvm_arm_pmu_irq_initialized(vcpu))
805                         return -ENXIO;
806
807                 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
808                                          &vcpu->arch.pmu);
809                 if (ret)
810                         return ret;
811         }
812
813         init_irq_work(&vcpu->arch.pmu.overflow_work,
814                       kvm_pmu_perf_overflow_notify_vcpu);
815
816         vcpu->arch.pmu.created = true;
817         return 0;
818 }
819
820 /*
821  * For one VM the interrupt type must be same for each vcpu.
822  * As a PPI, the interrupt number is the same for all vcpus,
823  * while as an SPI it must be a separate number per vcpu.
824  */
825 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
826 {
827         unsigned long i;
828         struct kvm_vcpu *vcpu;
829
830         kvm_for_each_vcpu(i, vcpu, kvm) {
831                 if (!kvm_arm_pmu_irq_initialized(vcpu))
832                         continue;
833
834                 if (irq_is_ppi(irq)) {
835                         if (vcpu->arch.pmu.irq_num != irq)
836                                 return false;
837                 } else {
838                         if (vcpu->arch.pmu.irq_num == irq)
839                                 return false;
840                 }
841         }
842
843         return true;
844 }
845
846 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
847 {
848         struct kvm *kvm = vcpu->kvm;
849         struct arm_pmu_entry *entry;
850         struct arm_pmu *arm_pmu;
851         int ret = -ENXIO;
852
853         mutex_lock(&kvm->lock);
854         mutex_lock(&arm_pmus_lock);
855
856         list_for_each_entry(entry, &arm_pmus, entry) {
857                 arm_pmu = entry->arm_pmu;
858                 if (arm_pmu->pmu.type == pmu_id) {
859                         if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
860                             (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
861                                 ret = -EBUSY;
862                                 break;
863                         }
864
865                         kvm->arch.arm_pmu = arm_pmu;
866                         cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
867                         ret = 0;
868                         break;
869                 }
870         }
871
872         mutex_unlock(&arm_pmus_lock);
873         mutex_unlock(&kvm->lock);
874         return ret;
875 }
876
877 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
878 {
879         struct kvm *kvm = vcpu->kvm;
880
881         if (!kvm_vcpu_has_pmu(vcpu))
882                 return -ENODEV;
883
884         if (vcpu->arch.pmu.created)
885                 return -EBUSY;
886
887         mutex_lock(&kvm->lock);
888         if (!kvm->arch.arm_pmu) {
889                 /* No PMU set, get the default one */
890                 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
891                 if (!kvm->arch.arm_pmu) {
892                         mutex_unlock(&kvm->lock);
893                         return -ENODEV;
894                 }
895         }
896         mutex_unlock(&kvm->lock);
897
898         switch (attr->attr) {
899         case KVM_ARM_VCPU_PMU_V3_IRQ: {
900                 int __user *uaddr = (int __user *)(long)attr->addr;
901                 int irq;
902
903                 if (!irqchip_in_kernel(kvm))
904                         return -EINVAL;
905
906                 if (get_user(irq, uaddr))
907                         return -EFAULT;
908
909                 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
910                 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
911                         return -EINVAL;
912
913                 if (!pmu_irq_is_valid(kvm, irq))
914                         return -EINVAL;
915
916                 if (kvm_arm_pmu_irq_initialized(vcpu))
917                         return -EBUSY;
918
919                 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
920                 vcpu->arch.pmu.irq_num = irq;
921                 return 0;
922         }
923         case KVM_ARM_VCPU_PMU_V3_FILTER: {
924                 struct kvm_pmu_event_filter __user *uaddr;
925                 struct kvm_pmu_event_filter filter;
926                 int nr_events;
927
928                 nr_events = kvm_pmu_event_mask(kvm) + 1;
929
930                 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
931
932                 if (copy_from_user(&filter, uaddr, sizeof(filter)))
933                         return -EFAULT;
934
935                 if (((u32)filter.base_event + filter.nevents) > nr_events ||
936                     (filter.action != KVM_PMU_EVENT_ALLOW &&
937                      filter.action != KVM_PMU_EVENT_DENY))
938                         return -EINVAL;
939
940                 mutex_lock(&kvm->lock);
941
942                 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
943                         mutex_unlock(&kvm->lock);
944                         return -EBUSY;
945                 }
946
947                 if (!kvm->arch.pmu_filter) {
948                         kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
949                         if (!kvm->arch.pmu_filter) {
950                                 mutex_unlock(&kvm->lock);
951                                 return -ENOMEM;
952                         }
953
954                         /*
955                          * The default depends on the first applied filter.
956                          * If it allows events, the default is to deny.
957                          * Conversely, if the first filter denies a set of
958                          * events, the default is to allow.
959                          */
960                         if (filter.action == KVM_PMU_EVENT_ALLOW)
961                                 bitmap_zero(kvm->arch.pmu_filter, nr_events);
962                         else
963                                 bitmap_fill(kvm->arch.pmu_filter, nr_events);
964                 }
965
966                 if (filter.action == KVM_PMU_EVENT_ALLOW)
967                         bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
968                 else
969                         bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
970
971                 mutex_unlock(&kvm->lock);
972
973                 return 0;
974         }
975         case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
976                 int __user *uaddr = (int __user *)(long)attr->addr;
977                 int pmu_id;
978
979                 if (get_user(pmu_id, uaddr))
980                         return -EFAULT;
981
982                 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
983         }
984         case KVM_ARM_VCPU_PMU_V3_INIT:
985                 return kvm_arm_pmu_v3_init(vcpu);
986         }
987
988         return -ENXIO;
989 }
990
991 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
992 {
993         switch (attr->attr) {
994         case KVM_ARM_VCPU_PMU_V3_IRQ: {
995                 int __user *uaddr = (int __user *)(long)attr->addr;
996                 int irq;
997
998                 if (!irqchip_in_kernel(vcpu->kvm))
999                         return -EINVAL;
1000
1001                 if (!kvm_vcpu_has_pmu(vcpu))
1002                         return -ENODEV;
1003
1004                 if (!kvm_arm_pmu_irq_initialized(vcpu))
1005                         return -ENXIO;
1006
1007                 irq = vcpu->arch.pmu.irq_num;
1008                 return put_user(irq, uaddr);
1009         }
1010         }
1011
1012         return -ENXIO;
1013 }
1014
1015 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1016 {
1017         switch (attr->attr) {
1018         case KVM_ARM_VCPU_PMU_V3_IRQ:
1019         case KVM_ARM_VCPU_PMU_V3_INIT:
1020         case KVM_ARM_VCPU_PMU_V3_FILTER:
1021         case KVM_ARM_VCPU_PMU_V3_SET_PMU:
1022                 if (kvm_vcpu_has_pmu(vcpu))
1023                         return 0;
1024         }
1025
1026         return -ENXIO;
1027 }