Merge tag 'drivers-5.10-2020-10-12' of git://git.kernel.dk/linux-block
[linux-2.6-block.git] / arch / x86 / events / intel / uncore.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7
8 static struct intel_uncore_type *empty_uncore[] = { NULL, };
9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
11 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
12
13 static bool pcidrv_registered;
14 struct pci_driver *uncore_pci_driver;
15 /* The PCI driver for the device which the uncore doesn't own. */
16 struct pci_driver *uncore_pci_sub_driver;
17 /* pci bus to socket mapping */
18 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
19 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
20 struct pci_extra_dev *uncore_extra_pci_dev;
21 int __uncore_max_dies;
22
23 /* mask of cpus that collect uncore events */
24 static cpumask_t uncore_cpu_mask;
25
26 /* constraint for the fixed counter */
27 static struct event_constraint uncore_constraint_fixed =
28         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
29 struct event_constraint uncore_constraint_empty =
30         EVENT_CONSTRAINT(0, 0, 0);
31
32 MODULE_LICENSE("GPL");
33
34 int uncore_pcibus_to_physid(struct pci_bus *bus)
35 {
36         struct pci2phy_map *map;
37         int phys_id = -1;
38
39         raw_spin_lock(&pci2phy_map_lock);
40         list_for_each_entry(map, &pci2phy_map_head, list) {
41                 if (map->segment == pci_domain_nr(bus)) {
42                         phys_id = map->pbus_to_physid[bus->number];
43                         break;
44                 }
45         }
46         raw_spin_unlock(&pci2phy_map_lock);
47
48         return phys_id;
49 }
50
51 static void uncore_free_pcibus_map(void)
52 {
53         struct pci2phy_map *map, *tmp;
54
55         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
56                 list_del(&map->list);
57                 kfree(map);
58         }
59 }
60
61 struct pci2phy_map *__find_pci2phy_map(int segment)
62 {
63         struct pci2phy_map *map, *alloc = NULL;
64         int i;
65
66         lockdep_assert_held(&pci2phy_map_lock);
67
68 lookup:
69         list_for_each_entry(map, &pci2phy_map_head, list) {
70                 if (map->segment == segment)
71                         goto end;
72         }
73
74         if (!alloc) {
75                 raw_spin_unlock(&pci2phy_map_lock);
76                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
77                 raw_spin_lock(&pci2phy_map_lock);
78
79                 if (!alloc)
80                         return NULL;
81
82                 goto lookup;
83         }
84
85         map = alloc;
86         alloc = NULL;
87         map->segment = segment;
88         for (i = 0; i < 256; i++)
89                 map->pbus_to_physid[i] = -1;
90         list_add_tail(&map->list, &pci2phy_map_head);
91
92 end:
93         kfree(alloc);
94         return map;
95 }
96
97 ssize_t uncore_event_show(struct kobject *kobj,
98                           struct kobj_attribute *attr, char *buf)
99 {
100         struct uncore_event_desc *event =
101                 container_of(attr, struct uncore_event_desc, attr);
102         return sprintf(buf, "%s", event->config);
103 }
104
105 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
106 {
107         unsigned int dieid = topology_logical_die_id(cpu);
108
109         /*
110          * The unsigned check also catches the '-1' return value for non
111          * existent mappings in the topology map.
112          */
113         return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
114 }
115
116 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
117 {
118         u64 count;
119
120         rdmsrl(event->hw.event_base, count);
121
122         return count;
123 }
124
125 void uncore_mmio_exit_box(struct intel_uncore_box *box)
126 {
127         if (box->io_addr)
128                 iounmap(box->io_addr);
129 }
130
131 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
132                              struct perf_event *event)
133 {
134         if (!box->io_addr)
135                 return 0;
136
137         if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
138                 return 0;
139
140         return readq(box->io_addr + event->hw.event_base);
141 }
142
143 /*
144  * generic get constraint function for shared match/mask registers.
145  */
146 struct event_constraint *
147 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
148 {
149         struct intel_uncore_extra_reg *er;
150         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
151         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
152         unsigned long flags;
153         bool ok = false;
154
155         /*
156          * reg->alloc can be set due to existing state, so for fake box we
157          * need to ignore this, otherwise we might fail to allocate proper
158          * fake state for this extra reg constraint.
159          */
160         if (reg1->idx == EXTRA_REG_NONE ||
161             (!uncore_box_is_fake(box) && reg1->alloc))
162                 return NULL;
163
164         er = &box->shared_regs[reg1->idx];
165         raw_spin_lock_irqsave(&er->lock, flags);
166         if (!atomic_read(&er->ref) ||
167             (er->config1 == reg1->config && er->config2 == reg2->config)) {
168                 atomic_inc(&er->ref);
169                 er->config1 = reg1->config;
170                 er->config2 = reg2->config;
171                 ok = true;
172         }
173         raw_spin_unlock_irqrestore(&er->lock, flags);
174
175         if (ok) {
176                 if (!uncore_box_is_fake(box))
177                         reg1->alloc = 1;
178                 return NULL;
179         }
180
181         return &uncore_constraint_empty;
182 }
183
184 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
185 {
186         struct intel_uncore_extra_reg *er;
187         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
188
189         /*
190          * Only put constraint if extra reg was actually allocated. Also
191          * takes care of event which do not use an extra shared reg.
192          *
193          * Also, if this is a fake box we shouldn't touch any event state
194          * (reg->alloc) and we don't care about leaving inconsistent box
195          * state either since it will be thrown out.
196          */
197         if (uncore_box_is_fake(box) || !reg1->alloc)
198                 return;
199
200         er = &box->shared_regs[reg1->idx];
201         atomic_dec(&er->ref);
202         reg1->alloc = 0;
203 }
204
205 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
206 {
207         struct intel_uncore_extra_reg *er;
208         unsigned long flags;
209         u64 config;
210
211         er = &box->shared_regs[idx];
212
213         raw_spin_lock_irqsave(&er->lock, flags);
214         config = er->config;
215         raw_spin_unlock_irqrestore(&er->lock, flags);
216
217         return config;
218 }
219
220 static void uncore_assign_hw_event(struct intel_uncore_box *box,
221                                    struct perf_event *event, int idx)
222 {
223         struct hw_perf_event *hwc = &event->hw;
224
225         hwc->idx = idx;
226         hwc->last_tag = ++box->tags[idx];
227
228         if (uncore_pmc_fixed(hwc->idx)) {
229                 hwc->event_base = uncore_fixed_ctr(box);
230                 hwc->config_base = uncore_fixed_ctl(box);
231                 return;
232         }
233
234         hwc->config_base = uncore_event_ctl(box, hwc->idx);
235         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
236 }
237
238 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
239 {
240         u64 prev_count, new_count, delta;
241         int shift;
242
243         if (uncore_pmc_freerunning(event->hw.idx))
244                 shift = 64 - uncore_freerunning_bits(box, event);
245         else if (uncore_pmc_fixed(event->hw.idx))
246                 shift = 64 - uncore_fixed_ctr_bits(box);
247         else
248                 shift = 64 - uncore_perf_ctr_bits(box);
249
250         /* the hrtimer might modify the previous event value */
251 again:
252         prev_count = local64_read(&event->hw.prev_count);
253         new_count = uncore_read_counter(box, event);
254         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
255                 goto again;
256
257         delta = (new_count << shift) - (prev_count << shift);
258         delta >>= shift;
259
260         local64_add(delta, &event->count);
261 }
262
263 /*
264  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
265  * for SandyBridge. So we use hrtimer to periodically poll the counter
266  * to avoid overflow.
267  */
268 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
269 {
270         struct intel_uncore_box *box;
271         struct perf_event *event;
272         unsigned long flags;
273         int bit;
274
275         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
276         if (!box->n_active || box->cpu != smp_processor_id())
277                 return HRTIMER_NORESTART;
278         /*
279          * disable local interrupt to prevent uncore_pmu_event_start/stop
280          * to interrupt the update process
281          */
282         local_irq_save(flags);
283
284         /*
285          * handle boxes with an active event list as opposed to active
286          * counters
287          */
288         list_for_each_entry(event, &box->active_list, active_entry) {
289                 uncore_perf_event_update(box, event);
290         }
291
292         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
293                 uncore_perf_event_update(box, box->events[bit]);
294
295         local_irq_restore(flags);
296
297         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
298         return HRTIMER_RESTART;
299 }
300
301 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
302 {
303         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
304                       HRTIMER_MODE_REL_PINNED);
305 }
306
307 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
308 {
309         hrtimer_cancel(&box->hrtimer);
310 }
311
312 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
313 {
314         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
315         box->hrtimer.function = uncore_pmu_hrtimer;
316 }
317
318 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
319                                                  int node)
320 {
321         int i, size, numshared = type->num_shared_regs ;
322         struct intel_uncore_box *box;
323
324         size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
325
326         box = kzalloc_node(size, GFP_KERNEL, node);
327         if (!box)
328                 return NULL;
329
330         for (i = 0; i < numshared; i++)
331                 raw_spin_lock_init(&box->shared_regs[i].lock);
332
333         uncore_pmu_init_hrtimer(box);
334         box->cpu = -1;
335         box->pci_phys_id = -1;
336         box->dieid = -1;
337
338         /* set default hrtimer timeout */
339         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
340
341         INIT_LIST_HEAD(&box->active_list);
342
343         return box;
344 }
345
346 /*
347  * Using uncore_pmu_event_init pmu event_init callback
348  * as a detection point for uncore events.
349  */
350 static int uncore_pmu_event_init(struct perf_event *event);
351
352 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
353 {
354         return &box->pmu->pmu == event->pmu;
355 }
356
357 static int
358 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
359                       bool dogrp)
360 {
361         struct perf_event *event;
362         int n, max_count;
363
364         max_count = box->pmu->type->num_counters;
365         if (box->pmu->type->fixed_ctl)
366                 max_count++;
367
368         if (box->n_events >= max_count)
369                 return -EINVAL;
370
371         n = box->n_events;
372
373         if (is_box_event(box, leader)) {
374                 box->event_list[n] = leader;
375                 n++;
376         }
377
378         if (!dogrp)
379                 return n;
380
381         for_each_sibling_event(event, leader) {
382                 if (!is_box_event(box, event) ||
383                     event->state <= PERF_EVENT_STATE_OFF)
384                         continue;
385
386                 if (n >= max_count)
387                         return -EINVAL;
388
389                 box->event_list[n] = event;
390                 n++;
391         }
392         return n;
393 }
394
395 static struct event_constraint *
396 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
397 {
398         struct intel_uncore_type *type = box->pmu->type;
399         struct event_constraint *c;
400
401         if (type->ops->get_constraint) {
402                 c = type->ops->get_constraint(box, event);
403                 if (c)
404                         return c;
405         }
406
407         if (event->attr.config == UNCORE_FIXED_EVENT)
408                 return &uncore_constraint_fixed;
409
410         if (type->constraints) {
411                 for_each_event_constraint(c, type->constraints) {
412                         if ((event->hw.config & c->cmask) == c->code)
413                                 return c;
414                 }
415         }
416
417         return &type->unconstrainted;
418 }
419
420 static void uncore_put_event_constraint(struct intel_uncore_box *box,
421                                         struct perf_event *event)
422 {
423         if (box->pmu->type->ops->put_constraint)
424                 box->pmu->type->ops->put_constraint(box, event);
425 }
426
427 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
428 {
429         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
430         struct event_constraint *c;
431         int i, wmin, wmax, ret = 0;
432         struct hw_perf_event *hwc;
433
434         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
435
436         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
437                 c = uncore_get_event_constraint(box, box->event_list[i]);
438                 box->event_constraint[i] = c;
439                 wmin = min(wmin, c->weight);
440                 wmax = max(wmax, c->weight);
441         }
442
443         /* fastpath, try to reuse previous register */
444         for (i = 0; i < n; i++) {
445                 hwc = &box->event_list[i]->hw;
446                 c = box->event_constraint[i];
447
448                 /* never assigned */
449                 if (hwc->idx == -1)
450                         break;
451
452                 /* constraint still honored */
453                 if (!test_bit(hwc->idx, c->idxmsk))
454                         break;
455
456                 /* not already used */
457                 if (test_bit(hwc->idx, used_mask))
458                         break;
459
460                 __set_bit(hwc->idx, used_mask);
461                 if (assign)
462                         assign[i] = hwc->idx;
463         }
464         /* slow path */
465         if (i != n)
466                 ret = perf_assign_events(box->event_constraint, n,
467                                          wmin, wmax, n, assign);
468
469         if (!assign || ret) {
470                 for (i = 0; i < n; i++)
471                         uncore_put_event_constraint(box, box->event_list[i]);
472         }
473         return ret ? -EINVAL : 0;
474 }
475
476 void uncore_pmu_event_start(struct perf_event *event, int flags)
477 {
478         struct intel_uncore_box *box = uncore_event_to_box(event);
479         int idx = event->hw.idx;
480
481         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
482                 return;
483
484         /*
485          * Free running counter is read-only and always active.
486          * Use the current counter value as start point.
487          * There is no overflow interrupt for free running counter.
488          * Use hrtimer to periodically poll the counter to avoid overflow.
489          */
490         if (uncore_pmc_freerunning(event->hw.idx)) {
491                 list_add_tail(&event->active_entry, &box->active_list);
492                 local64_set(&event->hw.prev_count,
493                             uncore_read_counter(box, event));
494                 if (box->n_active++ == 0)
495                         uncore_pmu_start_hrtimer(box);
496                 return;
497         }
498
499         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
500                 return;
501
502         event->hw.state = 0;
503         box->events[idx] = event;
504         box->n_active++;
505         __set_bit(idx, box->active_mask);
506
507         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
508         uncore_enable_event(box, event);
509
510         if (box->n_active == 1)
511                 uncore_pmu_start_hrtimer(box);
512 }
513
514 void uncore_pmu_event_stop(struct perf_event *event, int flags)
515 {
516         struct intel_uncore_box *box = uncore_event_to_box(event);
517         struct hw_perf_event *hwc = &event->hw;
518
519         /* Cannot disable free running counter which is read-only */
520         if (uncore_pmc_freerunning(hwc->idx)) {
521                 list_del(&event->active_entry);
522                 if (--box->n_active == 0)
523                         uncore_pmu_cancel_hrtimer(box);
524                 uncore_perf_event_update(box, event);
525                 return;
526         }
527
528         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
529                 uncore_disable_event(box, event);
530                 box->n_active--;
531                 box->events[hwc->idx] = NULL;
532                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
533                 hwc->state |= PERF_HES_STOPPED;
534
535                 if (box->n_active == 0)
536                         uncore_pmu_cancel_hrtimer(box);
537         }
538
539         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
540                 /*
541                  * Drain the remaining delta count out of a event
542                  * that we are disabling:
543                  */
544                 uncore_perf_event_update(box, event);
545                 hwc->state |= PERF_HES_UPTODATE;
546         }
547 }
548
549 int uncore_pmu_event_add(struct perf_event *event, int flags)
550 {
551         struct intel_uncore_box *box = uncore_event_to_box(event);
552         struct hw_perf_event *hwc = &event->hw;
553         int assign[UNCORE_PMC_IDX_MAX];
554         int i, n, ret;
555
556         if (!box)
557                 return -ENODEV;
558
559         /*
560          * The free funning counter is assigned in event_init().
561          * The free running counter event and free running counter
562          * are 1:1 mapped. It doesn't need to be tracked in event_list.
563          */
564         if (uncore_pmc_freerunning(hwc->idx)) {
565                 if (flags & PERF_EF_START)
566                         uncore_pmu_event_start(event, 0);
567                 return 0;
568         }
569
570         ret = n = uncore_collect_events(box, event, false);
571         if (ret < 0)
572                 return ret;
573
574         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
575         if (!(flags & PERF_EF_START))
576                 hwc->state |= PERF_HES_ARCH;
577
578         ret = uncore_assign_events(box, assign, n);
579         if (ret)
580                 return ret;
581
582         /* save events moving to new counters */
583         for (i = 0; i < box->n_events; i++) {
584                 event = box->event_list[i];
585                 hwc = &event->hw;
586
587                 if (hwc->idx == assign[i] &&
588                         hwc->last_tag == box->tags[assign[i]])
589                         continue;
590                 /*
591                  * Ensure we don't accidentally enable a stopped
592                  * counter simply because we rescheduled.
593                  */
594                 if (hwc->state & PERF_HES_STOPPED)
595                         hwc->state |= PERF_HES_ARCH;
596
597                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
598         }
599
600         /* reprogram moved events into new counters */
601         for (i = 0; i < n; i++) {
602                 event = box->event_list[i];
603                 hwc = &event->hw;
604
605                 if (hwc->idx != assign[i] ||
606                         hwc->last_tag != box->tags[assign[i]])
607                         uncore_assign_hw_event(box, event, assign[i]);
608                 else if (i < box->n_events)
609                         continue;
610
611                 if (hwc->state & PERF_HES_ARCH)
612                         continue;
613
614                 uncore_pmu_event_start(event, 0);
615         }
616         box->n_events = n;
617
618         return 0;
619 }
620
621 void uncore_pmu_event_del(struct perf_event *event, int flags)
622 {
623         struct intel_uncore_box *box = uncore_event_to_box(event);
624         int i;
625
626         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
627
628         /*
629          * The event for free running counter is not tracked by event_list.
630          * It doesn't need to force event->hw.idx = -1 to reassign the counter.
631          * Because the event and the free running counter are 1:1 mapped.
632          */
633         if (uncore_pmc_freerunning(event->hw.idx))
634                 return;
635
636         for (i = 0; i < box->n_events; i++) {
637                 if (event == box->event_list[i]) {
638                         uncore_put_event_constraint(box, event);
639
640                         for (++i; i < box->n_events; i++)
641                                 box->event_list[i - 1] = box->event_list[i];
642
643                         --box->n_events;
644                         break;
645                 }
646         }
647
648         event->hw.idx = -1;
649         event->hw.last_tag = ~0ULL;
650 }
651
652 void uncore_pmu_event_read(struct perf_event *event)
653 {
654         struct intel_uncore_box *box = uncore_event_to_box(event);
655         uncore_perf_event_update(box, event);
656 }
657
658 /*
659  * validation ensures the group can be loaded onto the
660  * PMU if it was the only group available.
661  */
662 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
663                                 struct perf_event *event)
664 {
665         struct perf_event *leader = event->group_leader;
666         struct intel_uncore_box *fake_box;
667         int ret = -EINVAL, n;
668
669         /* The free running counter is always active. */
670         if (uncore_pmc_freerunning(event->hw.idx))
671                 return 0;
672
673         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
674         if (!fake_box)
675                 return -ENOMEM;
676
677         fake_box->pmu = pmu;
678         /*
679          * the event is not yet connected with its
680          * siblings therefore we must first collect
681          * existing siblings, then add the new event
682          * before we can simulate the scheduling
683          */
684         n = uncore_collect_events(fake_box, leader, true);
685         if (n < 0)
686                 goto out;
687
688         fake_box->n_events = n;
689         n = uncore_collect_events(fake_box, event, false);
690         if (n < 0)
691                 goto out;
692
693         fake_box->n_events = n;
694
695         ret = uncore_assign_events(fake_box, NULL, n);
696 out:
697         kfree(fake_box);
698         return ret;
699 }
700
701 static int uncore_pmu_event_init(struct perf_event *event)
702 {
703         struct intel_uncore_pmu *pmu;
704         struct intel_uncore_box *box;
705         struct hw_perf_event *hwc = &event->hw;
706         int ret;
707
708         if (event->attr.type != event->pmu->type)
709                 return -ENOENT;
710
711         pmu = uncore_event_to_pmu(event);
712         /* no device found for this pmu */
713         if (pmu->func_id < 0)
714                 return -ENOENT;
715
716         /* Sampling not supported yet */
717         if (hwc->sample_period)
718                 return -EINVAL;
719
720         /*
721          * Place all uncore events for a particular physical package
722          * onto a single cpu
723          */
724         if (event->cpu < 0)
725                 return -EINVAL;
726         box = uncore_pmu_to_box(pmu, event->cpu);
727         if (!box || box->cpu < 0)
728                 return -EINVAL;
729         event->cpu = box->cpu;
730         event->pmu_private = box;
731
732         event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
733
734         event->hw.idx = -1;
735         event->hw.last_tag = ~0ULL;
736         event->hw.extra_reg.idx = EXTRA_REG_NONE;
737         event->hw.branch_reg.idx = EXTRA_REG_NONE;
738
739         if (event->attr.config == UNCORE_FIXED_EVENT) {
740                 /* no fixed counter */
741                 if (!pmu->type->fixed_ctl)
742                         return -EINVAL;
743                 /*
744                  * if there is only one fixed counter, only the first pmu
745                  * can access the fixed counter
746                  */
747                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
748                         return -EINVAL;
749
750                 /* fixed counters have event field hardcoded to zero */
751                 hwc->config = 0ULL;
752         } else if (is_freerunning_event(event)) {
753                 hwc->config = event->attr.config;
754                 if (!check_valid_freerunning_event(box, event))
755                         return -EINVAL;
756                 event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
757                 /*
758                  * The free running counter event and free running counter
759                  * are always 1:1 mapped.
760                  * The free running counter is always active.
761                  * Assign the free running counter here.
762                  */
763                 event->hw.event_base = uncore_freerunning_counter(box, event);
764         } else {
765                 hwc->config = event->attr.config &
766                               (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
767                 if (pmu->type->ops->hw_config) {
768                         ret = pmu->type->ops->hw_config(box, event);
769                         if (ret)
770                                 return ret;
771                 }
772         }
773
774         if (event->group_leader != event)
775                 ret = uncore_validate_group(pmu, event);
776         else
777                 ret = 0;
778
779         return ret;
780 }
781
782 static void uncore_pmu_enable(struct pmu *pmu)
783 {
784         struct intel_uncore_pmu *uncore_pmu;
785         struct intel_uncore_box *box;
786
787         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
788         if (!uncore_pmu)
789                 return;
790
791         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
792         if (!box)
793                 return;
794
795         if (uncore_pmu->type->ops->enable_box)
796                 uncore_pmu->type->ops->enable_box(box);
797 }
798
799 static void uncore_pmu_disable(struct pmu *pmu)
800 {
801         struct intel_uncore_pmu *uncore_pmu;
802         struct intel_uncore_box *box;
803
804         uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
805         if (!uncore_pmu)
806                 return;
807
808         box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
809         if (!box)
810                 return;
811
812         if (uncore_pmu->type->ops->disable_box)
813                 uncore_pmu->type->ops->disable_box(box);
814 }
815
816 static ssize_t uncore_get_attr_cpumask(struct device *dev,
817                                 struct device_attribute *attr, char *buf)
818 {
819         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
820 }
821
822 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
823
824 static struct attribute *uncore_pmu_attrs[] = {
825         &dev_attr_cpumask.attr,
826         NULL,
827 };
828
829 static const struct attribute_group uncore_pmu_attr_group = {
830         .attrs = uncore_pmu_attrs,
831 };
832
833 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
834 {
835         int ret;
836
837         if (!pmu->type->pmu) {
838                 pmu->pmu = (struct pmu) {
839                         .attr_groups    = pmu->type->attr_groups,
840                         .task_ctx_nr    = perf_invalid_context,
841                         .pmu_enable     = uncore_pmu_enable,
842                         .pmu_disable    = uncore_pmu_disable,
843                         .event_init     = uncore_pmu_event_init,
844                         .add            = uncore_pmu_event_add,
845                         .del            = uncore_pmu_event_del,
846                         .start          = uncore_pmu_event_start,
847                         .stop           = uncore_pmu_event_stop,
848                         .read           = uncore_pmu_event_read,
849                         .module         = THIS_MODULE,
850                         .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
851                         .attr_update    = pmu->type->attr_update,
852                 };
853         } else {
854                 pmu->pmu = *pmu->type->pmu;
855                 pmu->pmu.attr_groups = pmu->type->attr_groups;
856                 pmu->pmu.attr_update = pmu->type->attr_update;
857         }
858
859         if (pmu->type->num_boxes == 1) {
860                 if (strlen(pmu->type->name) > 0)
861                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
862                 else
863                         sprintf(pmu->name, "uncore");
864         } else {
865                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
866                         pmu->pmu_idx);
867         }
868
869         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
870         if (!ret)
871                 pmu->registered = true;
872         return ret;
873 }
874
875 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
876 {
877         if (!pmu->registered)
878                 return;
879         perf_pmu_unregister(&pmu->pmu);
880         pmu->registered = false;
881 }
882
883 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
884 {
885         int die;
886
887         for (die = 0; die < uncore_max_dies(); die++)
888                 kfree(pmu->boxes[die]);
889         kfree(pmu->boxes);
890 }
891
892 static void uncore_type_exit(struct intel_uncore_type *type)
893 {
894         struct intel_uncore_pmu *pmu = type->pmus;
895         int i;
896
897         if (type->cleanup_mapping)
898                 type->cleanup_mapping(type);
899
900         if (pmu) {
901                 for (i = 0; i < type->num_boxes; i++, pmu++) {
902                         uncore_pmu_unregister(pmu);
903                         uncore_free_boxes(pmu);
904                 }
905                 kfree(type->pmus);
906                 type->pmus = NULL;
907         }
908         kfree(type->events_group);
909         type->events_group = NULL;
910 }
911
912 static void uncore_types_exit(struct intel_uncore_type **types)
913 {
914         for (; *types; types++)
915                 uncore_type_exit(*types);
916 }
917
918 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
919 {
920         struct intel_uncore_pmu *pmus;
921         size_t size;
922         int i, j;
923
924         pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
925         if (!pmus)
926                 return -ENOMEM;
927
928         size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
929
930         for (i = 0; i < type->num_boxes; i++) {
931                 pmus[i].func_id = setid ? i : -1;
932                 pmus[i].pmu_idx = i;
933                 pmus[i].type    = type;
934                 pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
935                 if (!pmus[i].boxes)
936                         goto err;
937         }
938
939         type->pmus = pmus;
940         type->unconstrainted = (struct event_constraint)
941                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
942                                 0, type->num_counters, 0, 0);
943
944         if (type->event_descs) {
945                 struct {
946                         struct attribute_group group;
947                         struct attribute *attrs[];
948                 } *attr_group;
949                 for (i = 0; type->event_descs[i].attr.attr.name; i++);
950
951                 attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
952                                                                 GFP_KERNEL);
953                 if (!attr_group)
954                         goto err;
955
956                 attr_group->group.name = "events";
957                 attr_group->group.attrs = attr_group->attrs;
958
959                 for (j = 0; j < i; j++)
960                         attr_group->attrs[j] = &type->event_descs[j].attr.attr;
961
962                 type->events_group = &attr_group->group;
963         }
964
965         type->pmu_group = &uncore_pmu_attr_group;
966
967         if (type->set_mapping)
968                 type->set_mapping(type);
969
970         return 0;
971
972 err:
973         for (i = 0; i < type->num_boxes; i++)
974                 kfree(pmus[i].boxes);
975         kfree(pmus);
976
977         return -ENOMEM;
978 }
979
980 static int __init
981 uncore_types_init(struct intel_uncore_type **types, bool setid)
982 {
983         int ret;
984
985         for (; *types; types++) {
986                 ret = uncore_type_init(*types, setid);
987                 if (ret)
988                         return ret;
989         }
990         return 0;
991 }
992
993 /*
994  * Get the die information of a PCI device.
995  * @pdev: The PCI device.
996  * @phys_id: The physical socket id which the device maps to.
997  * @die: The die id which the device maps to.
998  */
999 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
1000                                        int *phys_id, int *die)
1001 {
1002         *phys_id = uncore_pcibus_to_physid(pdev->bus);
1003         if (*phys_id < 0)
1004                 return -ENODEV;
1005
1006         *die = (topology_max_die_per_package() > 1) ? *phys_id :
1007                                 topology_phys_to_logical_pkg(*phys_id);
1008         if (*die < 0)
1009                 return -EINVAL;
1010
1011         return 0;
1012 }
1013
1014 /*
1015  * Find the PMU of a PCI device.
1016  * @pdev: The PCI device.
1017  * @ids: The ID table of the available PCI devices with a PMU.
1018  */
1019 static struct intel_uncore_pmu *
1020 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1021 {
1022         struct intel_uncore_pmu *pmu = NULL;
1023         struct intel_uncore_type *type;
1024         kernel_ulong_t data;
1025         unsigned int devfn;
1026
1027         while (ids && ids->vendor) {
1028                 if ((ids->vendor == pdev->vendor) &&
1029                     (ids->device == pdev->device)) {
1030                         data = ids->driver_data;
1031                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1032                                           UNCORE_PCI_DEV_FUNC(data));
1033                         if (devfn == pdev->devfn) {
1034                                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1035                                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1036                                 break;
1037                         }
1038                 }
1039                 ids++;
1040         }
1041         return pmu;
1042 }
1043
1044 /*
1045  * Register the PMU for a PCI device
1046  * @pdev: The PCI device.
1047  * @type: The corresponding PMU type of the device.
1048  * @pmu: The corresponding PMU of the device.
1049  * @phys_id: The physical socket id which the device maps to.
1050  * @die: The die id which the device maps to.
1051  */
1052 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1053                                    struct intel_uncore_type *type,
1054                                    struct intel_uncore_pmu *pmu,
1055                                    int phys_id, int die)
1056 {
1057         struct intel_uncore_box *box;
1058         int ret;
1059
1060         if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1061                 return -EINVAL;
1062
1063         box = uncore_alloc_box(type, NUMA_NO_NODE);
1064         if (!box)
1065                 return -ENOMEM;
1066
1067         if (pmu->func_id < 0)
1068                 pmu->func_id = pdev->devfn;
1069         else
1070                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1071
1072         atomic_inc(&box->refcnt);
1073         box->pci_phys_id = phys_id;
1074         box->dieid = die;
1075         box->pci_dev = pdev;
1076         box->pmu = pmu;
1077         uncore_box_init(box);
1078
1079         pmu->boxes[die] = box;
1080         if (atomic_inc_return(&pmu->activeboxes) > 1)
1081                 return 0;
1082
1083         /* First active box registers the pmu */
1084         ret = uncore_pmu_register(pmu);
1085         if (ret) {
1086                 pmu->boxes[die] = NULL;
1087                 uncore_box_exit(box);
1088                 kfree(box);
1089         }
1090         return ret;
1091 }
1092
1093 /*
1094  * add a pci uncore device
1095  */
1096 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1097 {
1098         struct intel_uncore_type *type;
1099         struct intel_uncore_pmu *pmu = NULL;
1100         int phys_id, die, ret;
1101
1102         ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
1103         if (ret)
1104                 return ret;
1105
1106         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1107                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1108
1109                 uncore_extra_pci_dev[die].dev[idx] = pdev;
1110                 pci_set_drvdata(pdev, NULL);
1111                 return 0;
1112         }
1113
1114         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1115
1116         /*
1117          * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1118          * for multiple instances of an uncore PMU device type. We should check
1119          * PCI slot and func to indicate the uncore box.
1120          */
1121         if (id->driver_data & ~0xffff) {
1122                 struct pci_driver *pci_drv = pdev->driver;
1123
1124                 pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1125                 if (pmu == NULL)
1126                         return -ENODEV;
1127         } else {
1128                 /*
1129                  * for performance monitoring unit with multiple boxes,
1130                  * each box has a different function id.
1131                  */
1132                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1133         }
1134
1135         ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
1136
1137         pci_set_drvdata(pdev, pmu->boxes[die]);
1138
1139         return ret;
1140 }
1141
1142 /*
1143  * Unregister the PMU of a PCI device
1144  * @pmu: The corresponding PMU is unregistered.
1145  * @phys_id: The physical socket id which the device maps to.
1146  * @die: The die id which the device maps to.
1147  */
1148 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
1149                                       int phys_id, int die)
1150 {
1151         struct intel_uncore_box *box = pmu->boxes[die];
1152
1153         if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
1154                 return;
1155
1156         pmu->boxes[die] = NULL;
1157         if (atomic_dec_return(&pmu->activeboxes) == 0)
1158                 uncore_pmu_unregister(pmu);
1159         uncore_box_exit(box);
1160         kfree(box);
1161 }
1162
1163 static void uncore_pci_remove(struct pci_dev *pdev)
1164 {
1165         struct intel_uncore_box *box;
1166         struct intel_uncore_pmu *pmu;
1167         int i, phys_id, die;
1168
1169         if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
1170                 return;
1171
1172         box = pci_get_drvdata(pdev);
1173         if (!box) {
1174                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1175                         if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1176                                 uncore_extra_pci_dev[die].dev[i] = NULL;
1177                                 break;
1178                         }
1179                 }
1180                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1181                 return;
1182         }
1183
1184         pmu = box->pmu;
1185
1186         pci_set_drvdata(pdev, NULL);
1187
1188         uncore_pci_pmu_unregister(pmu, phys_id, die);
1189 }
1190
1191 static int uncore_bus_notify(struct notifier_block *nb,
1192                              unsigned long action, void *data)
1193 {
1194         struct device *dev = data;
1195         struct pci_dev *pdev = to_pci_dev(dev);
1196         struct intel_uncore_pmu *pmu;
1197         int phys_id, die;
1198
1199         /* Unregister the PMU when the device is going to be deleted. */
1200         if (action != BUS_NOTIFY_DEL_DEVICE)
1201                 return NOTIFY_DONE;
1202
1203         pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table);
1204         if (!pmu)
1205                 return NOTIFY_DONE;
1206
1207         if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
1208                 return NOTIFY_DONE;
1209
1210         uncore_pci_pmu_unregister(pmu, phys_id, die);
1211
1212         return NOTIFY_OK;
1213 }
1214
1215 static struct notifier_block uncore_notifier = {
1216         .notifier_call = uncore_bus_notify,
1217 };
1218
1219 static void uncore_pci_sub_driver_init(void)
1220 {
1221         const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1222         struct intel_uncore_type *type;
1223         struct intel_uncore_pmu *pmu;
1224         struct pci_dev *pci_sub_dev;
1225         bool notify = false;
1226         unsigned int devfn;
1227         int phys_id, die;
1228
1229         while (ids && ids->vendor) {
1230                 pci_sub_dev = NULL;
1231                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1232                 /*
1233                  * Search the available device, and register the
1234                  * corresponding PMU.
1235                  */
1236                 while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1237                                                      ids->device, pci_sub_dev))) {
1238                         devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1239                                           UNCORE_PCI_DEV_FUNC(ids->driver_data));
1240                         if (devfn != pci_sub_dev->devfn)
1241                                 continue;
1242
1243                         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1244                         if (!pmu)
1245                                 continue;
1246
1247                         if (uncore_pci_get_dev_die_info(pci_sub_dev,
1248                                                         &phys_id, &die))
1249                                 continue;
1250
1251                         if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1252                                                      phys_id, die))
1253                                 notify = true;
1254                 }
1255                 ids++;
1256         }
1257
1258         if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier))
1259                 notify = false;
1260
1261         if (!notify)
1262                 uncore_pci_sub_driver = NULL;
1263 }
1264
1265 static int __init uncore_pci_init(void)
1266 {
1267         size_t size;
1268         int ret;
1269
1270         size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1271         uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1272         if (!uncore_extra_pci_dev) {
1273                 ret = -ENOMEM;
1274                 goto err;
1275         }
1276
1277         ret = uncore_types_init(uncore_pci_uncores, false);
1278         if (ret)
1279                 goto errtype;
1280
1281         uncore_pci_driver->probe = uncore_pci_probe;
1282         uncore_pci_driver->remove = uncore_pci_remove;
1283
1284         ret = pci_register_driver(uncore_pci_driver);
1285         if (ret)
1286                 goto errtype;
1287
1288         if (uncore_pci_sub_driver)
1289                 uncore_pci_sub_driver_init();
1290
1291         pcidrv_registered = true;
1292         return 0;
1293
1294 errtype:
1295         uncore_types_exit(uncore_pci_uncores);
1296         kfree(uncore_extra_pci_dev);
1297         uncore_extra_pci_dev = NULL;
1298         uncore_free_pcibus_map();
1299 err:
1300         uncore_pci_uncores = empty_uncore;
1301         return ret;
1302 }
1303
1304 static void uncore_pci_exit(void)
1305 {
1306         if (pcidrv_registered) {
1307                 pcidrv_registered = false;
1308                 if (uncore_pci_sub_driver)
1309                         bus_unregister_notifier(&pci_bus_type, &uncore_notifier);
1310                 pci_unregister_driver(uncore_pci_driver);
1311                 uncore_types_exit(uncore_pci_uncores);
1312                 kfree(uncore_extra_pci_dev);
1313                 uncore_free_pcibus_map();
1314         }
1315 }
1316
1317 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1318                                    int new_cpu)
1319 {
1320         struct intel_uncore_pmu *pmu = type->pmus;
1321         struct intel_uncore_box *box;
1322         int i, die;
1323
1324         die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1325         for (i = 0; i < type->num_boxes; i++, pmu++) {
1326                 box = pmu->boxes[die];
1327                 if (!box)
1328                         continue;
1329
1330                 if (old_cpu < 0) {
1331                         WARN_ON_ONCE(box->cpu != -1);
1332                         box->cpu = new_cpu;
1333                         continue;
1334                 }
1335
1336                 WARN_ON_ONCE(box->cpu != old_cpu);
1337                 box->cpu = -1;
1338                 if (new_cpu < 0)
1339                         continue;
1340
1341                 uncore_pmu_cancel_hrtimer(box);
1342                 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1343                 box->cpu = new_cpu;
1344         }
1345 }
1346
1347 static void uncore_change_context(struct intel_uncore_type **uncores,
1348                                   int old_cpu, int new_cpu)
1349 {
1350         for (; *uncores; uncores++)
1351                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1352 }
1353
1354 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1355 {
1356         struct intel_uncore_type *type;
1357         struct intel_uncore_pmu *pmu;
1358         struct intel_uncore_box *box;
1359         int i;
1360
1361         for (; *types; types++) {
1362                 type = *types;
1363                 pmu = type->pmus;
1364                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1365                         box = pmu->boxes[id];
1366                         if (box && atomic_dec_return(&box->refcnt) == 0)
1367                                 uncore_box_exit(box);
1368                 }
1369         }
1370 }
1371
1372 static int uncore_event_cpu_offline(unsigned int cpu)
1373 {
1374         int die, target;
1375
1376         /* Check if exiting cpu is used for collecting uncore events */
1377         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1378                 goto unref;
1379         /* Find a new cpu to collect uncore events */
1380         target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1381
1382         /* Migrate uncore events to the new target */
1383         if (target < nr_cpu_ids)
1384                 cpumask_set_cpu(target, &uncore_cpu_mask);
1385         else
1386                 target = -1;
1387
1388         uncore_change_context(uncore_msr_uncores, cpu, target);
1389         uncore_change_context(uncore_mmio_uncores, cpu, target);
1390         uncore_change_context(uncore_pci_uncores, cpu, target);
1391
1392 unref:
1393         /* Clear the references */
1394         die = topology_logical_die_id(cpu);
1395         uncore_box_unref(uncore_msr_uncores, die);
1396         uncore_box_unref(uncore_mmio_uncores, die);
1397         return 0;
1398 }
1399
1400 static int allocate_boxes(struct intel_uncore_type **types,
1401                          unsigned int die, unsigned int cpu)
1402 {
1403         struct intel_uncore_box *box, *tmp;
1404         struct intel_uncore_type *type;
1405         struct intel_uncore_pmu *pmu;
1406         LIST_HEAD(allocated);
1407         int i;
1408
1409         /* Try to allocate all required boxes */
1410         for (; *types; types++) {
1411                 type = *types;
1412                 pmu = type->pmus;
1413                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1414                         if (pmu->boxes[die])
1415                                 continue;
1416                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1417                         if (!box)
1418                                 goto cleanup;
1419                         box->pmu = pmu;
1420                         box->dieid = die;
1421                         list_add(&box->active_list, &allocated);
1422                 }
1423         }
1424         /* Install them in the pmus */
1425         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1426                 list_del_init(&box->active_list);
1427                 box->pmu->boxes[die] = box;
1428         }
1429         return 0;
1430
1431 cleanup:
1432         list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1433                 list_del_init(&box->active_list);
1434                 kfree(box);
1435         }
1436         return -ENOMEM;
1437 }
1438
1439 static int uncore_box_ref(struct intel_uncore_type **types,
1440                           int id, unsigned int cpu)
1441 {
1442         struct intel_uncore_type *type;
1443         struct intel_uncore_pmu *pmu;
1444         struct intel_uncore_box *box;
1445         int i, ret;
1446
1447         ret = allocate_boxes(types, id, cpu);
1448         if (ret)
1449                 return ret;
1450
1451         for (; *types; types++) {
1452                 type = *types;
1453                 pmu = type->pmus;
1454                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1455                         box = pmu->boxes[id];
1456                         if (box && atomic_inc_return(&box->refcnt) == 1)
1457                                 uncore_box_init(box);
1458                 }
1459         }
1460         return 0;
1461 }
1462
1463 static int uncore_event_cpu_online(unsigned int cpu)
1464 {
1465         int die, target, msr_ret, mmio_ret;
1466
1467         die = topology_logical_die_id(cpu);
1468         msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1469         mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1470         if (msr_ret && mmio_ret)
1471                 return -ENOMEM;
1472
1473         /*
1474          * Check if there is an online cpu in the package
1475          * which collects uncore events already.
1476          */
1477         target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1478         if (target < nr_cpu_ids)
1479                 return 0;
1480
1481         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1482
1483         if (!msr_ret)
1484                 uncore_change_context(uncore_msr_uncores, -1, cpu);
1485         if (!mmio_ret)
1486                 uncore_change_context(uncore_mmio_uncores, -1, cpu);
1487         uncore_change_context(uncore_pci_uncores, -1, cpu);
1488         return 0;
1489 }
1490
1491 static int __init type_pmu_register(struct intel_uncore_type *type)
1492 {
1493         int i, ret;
1494
1495         for (i = 0; i < type->num_boxes; i++) {
1496                 ret = uncore_pmu_register(&type->pmus[i]);
1497                 if (ret)
1498                         return ret;
1499         }
1500         return 0;
1501 }
1502
1503 static int __init uncore_msr_pmus_register(void)
1504 {
1505         struct intel_uncore_type **types = uncore_msr_uncores;
1506         int ret;
1507
1508         for (; *types; types++) {
1509                 ret = type_pmu_register(*types);
1510                 if (ret)
1511                         return ret;
1512         }
1513         return 0;
1514 }
1515
1516 static int __init uncore_cpu_init(void)
1517 {
1518         int ret;
1519
1520         ret = uncore_types_init(uncore_msr_uncores, true);
1521         if (ret)
1522                 goto err;
1523
1524         ret = uncore_msr_pmus_register();
1525         if (ret)
1526                 goto err;
1527         return 0;
1528 err:
1529         uncore_types_exit(uncore_msr_uncores);
1530         uncore_msr_uncores = empty_uncore;
1531         return ret;
1532 }
1533
1534 static int __init uncore_mmio_init(void)
1535 {
1536         struct intel_uncore_type **types = uncore_mmio_uncores;
1537         int ret;
1538
1539         ret = uncore_types_init(types, true);
1540         if (ret)
1541                 goto err;
1542
1543         for (; *types; types++) {
1544                 ret = type_pmu_register(*types);
1545                 if (ret)
1546                         goto err;
1547         }
1548         return 0;
1549 err:
1550         uncore_types_exit(uncore_mmio_uncores);
1551         uncore_mmio_uncores = empty_uncore;
1552         return ret;
1553 }
1554
1555 struct intel_uncore_init_fun {
1556         void    (*cpu_init)(void);
1557         int     (*pci_init)(void);
1558         void    (*mmio_init)(void);
1559 };
1560
1561 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1562         .cpu_init = nhm_uncore_cpu_init,
1563 };
1564
1565 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1566         .cpu_init = snb_uncore_cpu_init,
1567         .pci_init = snb_uncore_pci_init,
1568 };
1569
1570 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1571         .cpu_init = snb_uncore_cpu_init,
1572         .pci_init = ivb_uncore_pci_init,
1573 };
1574
1575 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1576         .cpu_init = snb_uncore_cpu_init,
1577         .pci_init = hsw_uncore_pci_init,
1578 };
1579
1580 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1581         .cpu_init = snb_uncore_cpu_init,
1582         .pci_init = bdw_uncore_pci_init,
1583 };
1584
1585 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1586         .cpu_init = snbep_uncore_cpu_init,
1587         .pci_init = snbep_uncore_pci_init,
1588 };
1589
1590 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1591         .cpu_init = nhmex_uncore_cpu_init,
1592 };
1593
1594 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1595         .cpu_init = ivbep_uncore_cpu_init,
1596         .pci_init = ivbep_uncore_pci_init,
1597 };
1598
1599 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1600         .cpu_init = hswep_uncore_cpu_init,
1601         .pci_init = hswep_uncore_pci_init,
1602 };
1603
1604 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1605         .cpu_init = bdx_uncore_cpu_init,
1606         .pci_init = bdx_uncore_pci_init,
1607 };
1608
1609 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1610         .cpu_init = knl_uncore_cpu_init,
1611         .pci_init = knl_uncore_pci_init,
1612 };
1613
1614 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1615         .cpu_init = skl_uncore_cpu_init,
1616         .pci_init = skl_uncore_pci_init,
1617 };
1618
1619 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1620         .cpu_init = skx_uncore_cpu_init,
1621         .pci_init = skx_uncore_pci_init,
1622 };
1623
1624 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1625         .cpu_init = icl_uncore_cpu_init,
1626         .pci_init = skl_uncore_pci_init,
1627 };
1628
1629 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1630         .cpu_init = tgl_uncore_cpu_init,
1631         .mmio_init = tgl_uncore_mmio_init,
1632 };
1633
1634 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1635         .cpu_init = tgl_uncore_cpu_init,
1636         .mmio_init = tgl_l_uncore_mmio_init,
1637 };
1638
1639 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1640         .cpu_init = icx_uncore_cpu_init,
1641         .pci_init = icx_uncore_pci_init,
1642         .mmio_init = icx_uncore_mmio_init,
1643 };
1644
1645 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1646         .cpu_init = snr_uncore_cpu_init,
1647         .pci_init = snr_uncore_pci_init,
1648         .mmio_init = snr_uncore_mmio_init,
1649 };
1650
1651 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1652         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1653         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1654         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1655         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1656         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1657         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1658         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1659         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1660         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1661         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1662         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1663         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1664         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1665         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1666         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1667         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1668         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1669         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1670         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1671         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1672         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1673         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1674         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1675         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1676         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1677         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1678         X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1679         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1680         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1681         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1682         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1683         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1684         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1685         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1686         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1687         {},
1688 };
1689 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1690
1691 static int __init intel_uncore_init(void)
1692 {
1693         const struct x86_cpu_id *id;
1694         struct intel_uncore_init_fun *uncore_init;
1695         int pret = 0, cret = 0, mret = 0, ret;
1696
1697         id = x86_match_cpu(intel_uncore_match);
1698         if (!id)
1699                 return -ENODEV;
1700
1701         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1702                 return -ENODEV;
1703
1704         __uncore_max_dies =
1705                 topology_max_packages() * topology_max_die_per_package();
1706
1707         uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1708         if (uncore_init->pci_init) {
1709                 pret = uncore_init->pci_init();
1710                 if (!pret)
1711                         pret = uncore_pci_init();
1712         }
1713
1714         if (uncore_init->cpu_init) {
1715                 uncore_init->cpu_init();
1716                 cret = uncore_cpu_init();
1717         }
1718
1719         if (uncore_init->mmio_init) {
1720                 uncore_init->mmio_init();
1721                 mret = uncore_mmio_init();
1722         }
1723
1724         if (cret && pret && mret)
1725                 return -ENODEV;
1726
1727         /* Install hotplug callbacks to setup the targets for each package */
1728         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1729                                 "perf/x86/intel/uncore:online",
1730                                 uncore_event_cpu_online,
1731                                 uncore_event_cpu_offline);
1732         if (ret)
1733                 goto err;
1734         return 0;
1735
1736 err:
1737         uncore_types_exit(uncore_msr_uncores);
1738         uncore_types_exit(uncore_mmio_uncores);
1739         uncore_pci_exit();
1740         return ret;
1741 }
1742 module_init(intel_uncore_init);
1743
1744 static void __exit intel_uncore_exit(void)
1745 {
1746         cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1747         uncore_types_exit(uncore_msr_uncores);
1748         uncore_types_exit(uncore_mmio_uncores);
1749         uncore_pci_exit();
1750 }
1751 module_exit(intel_uncore_exit);