x86/oprofile: remove MSR macros for ppro cpus
[linux-2.6-block.git] / arch / x86 / oprofile / op_model_p4.c
CommitLineData
1da177e4
LT
1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
20211e4d
PC
13#include <linux/ptrace.h>
14#include <linux/nmi.h>
1da177e4 15#include <asm/msr.h>
1da177e4
LT
16#include <asm/fixmap.h>
17#include <asm/apic.h>
20211e4d 18
1da177e4
LT
19
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
35static unsigned int num_counters = NUM_COUNTERS_NON_HT;
cb9c448c 36static unsigned int num_controls = NUM_CONTROLS_NON_HT;
1da177e4
LT
37
38/* this has to be checked dynamically since the
39 hyper-threadedness of a chip is discovered at
40 kernel boot-time. */
41static inline void setup_num_counters(void)
42{
43#ifdef CONFIG_SMP
20211e4d 44 if (smp_num_siblings == 2) {
1da177e4 45 num_counters = NUM_COUNTERS_HT2;
cb9c448c
DZ
46 num_controls = NUM_CONTROLS_HT2;
47 }
1da177e4
LT
48#endif
49}
50
51static int inline addr_increment(void)
52{
53#ifdef CONFIG_SMP
54 return smp_num_siblings == 2 ? 2 : 1;
55#else
56 return 1;
57#endif
58}
59
60
61/* tables to simulate simplified hardware view of p4 registers */
62struct p4_counter_binding {
63 int virt_counter;
64 int counter_address;
65 int cccr_address;
66};
67
68struct p4_event_binding {
69 int escr_select; /* value to put in CCCR */
70 int event_select; /* value to put in ESCR */
71 struct {
72 int virt_counter; /* for this counter... */
73 int escr_address; /* use this ESCR */
74 } bindings[2];
75};
76
77/* nb: these CTR_* defines are a duplicate of defines in
78 event/i386.p4*events. */
79
80
81#define CTR_BPU_0 (1 << 0)
82#define CTR_MS_0 (1 << 1)
83#define CTR_FLAME_0 (1 << 2)
84#define CTR_IQ_4 (1 << 3)
85#define CTR_BPU_2 (1 << 4)
86#define CTR_MS_2 (1 << 5)
87#define CTR_FLAME_2 (1 << 6)
88#define CTR_IQ_5 (1 << 7)
89
20211e4d 90static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
1da177e4
LT
91 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
92 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
93 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
94 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
95 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
96 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
97 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
98 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
99};
100
20211e4d 101#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
1da177e4 102
1da177e4
LT
103/* p4 event codes in libop/op_event.h are indices into this table. */
104
105static struct p4_event_binding p4_events[NUM_EVENTS] = {
20211e4d 106
1da177e4 107 { /* BRANCH_RETIRED */
20211e4d 108 0x05, 0x06,
1da177e4
LT
109 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
110 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
111 },
20211e4d 112
1da177e4 113 { /* MISPRED_BRANCH_RETIRED */
20211e4d 114 0x04, 0x03,
1da177e4
LT
115 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
116 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
117 },
20211e4d 118
1da177e4
LT
119 { /* TC_DELIVER_MODE */
120 0x01, 0x01,
20211e4d 121 { { CTR_MS_0, MSR_P4_TC_ESCR0},
1da177e4
LT
122 { CTR_MS_2, MSR_P4_TC_ESCR1} }
123 },
20211e4d 124
1da177e4 125 { /* BPU_FETCH_REQUEST */
20211e4d 126 0x00, 0x03,
1da177e4
LT
127 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
128 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
129 },
130
131 { /* ITLB_REFERENCE */
132 0x03, 0x18,
133 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
134 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
135 },
136
137 { /* MEMORY_CANCEL */
138 0x05, 0x02,
139 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
140 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
141 },
142
143 { /* MEMORY_COMPLETE */
144 0x02, 0x08,
145 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
146 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
147 },
148
149 { /* LOAD_PORT_REPLAY */
20211e4d 150 0x02, 0x04,
1da177e4
LT
151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
153 },
154
155 { /* STORE_PORT_REPLAY */
156 0x02, 0x05,
157 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
158 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
159 },
160
161 { /* MOB_LOAD_REPLAY */
162 0x02, 0x03,
163 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
164 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
165 },
166
167 { /* PAGE_WALK_TYPE */
168 0x04, 0x01,
169 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
170 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
171 },
172
173 { /* BSQ_CACHE_REFERENCE */
20211e4d 174 0x07, 0x0c,
1da177e4
LT
175 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
176 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
177 },
178
179 { /* IOQ_ALLOCATION */
20211e4d 180 0x06, 0x03,
1da177e4
LT
181 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
182 { 0, 0 } }
183 },
184
185 { /* IOQ_ACTIVE_ENTRIES */
20211e4d 186 0x06, 0x1a,
1da177e4
LT
187 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
188 { 0, 0 } }
189 },
190
191 { /* FSB_DATA_ACTIVITY */
20211e4d 192 0x06, 0x17,
1da177e4
LT
193 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
194 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
195 },
196
197 { /* BSQ_ALLOCATION */
20211e4d 198 0x07, 0x05,
1da177e4
LT
199 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
200 { 0, 0 } }
201 },
202
203 { /* BSQ_ACTIVE_ENTRIES */
204 0x07, 0x06,
20211e4d 205 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
1da177e4
LT
206 { 0, 0 } }
207 },
208
209 { /* X87_ASSIST */
20211e4d 210 0x05, 0x03,
1da177e4
LT
211 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
212 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
213 },
214
215 { /* SSE_INPUT_ASSIST */
216 0x01, 0x34,
217 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
218 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
219 },
20211e4d 220
1da177e4 221 { /* PACKED_SP_UOP */
20211e4d 222 0x01, 0x08,
1da177e4
LT
223 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
224 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
225 },
20211e4d 226
1da177e4 227 { /* PACKED_DP_UOP */
20211e4d 228 0x01, 0x0c,
1da177e4
LT
229 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
230 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
231 },
232
233 { /* SCALAR_SP_UOP */
20211e4d 234 0x01, 0x0a,
1da177e4
LT
235 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
236 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
237 },
238
239 { /* SCALAR_DP_UOP */
240 0x01, 0x0e,
241 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
242 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
243 },
244
245 { /* 64BIT_MMX_UOP */
20211e4d 246 0x01, 0x02,
1da177e4
LT
247 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
248 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
249 },
20211e4d 250
1da177e4 251 { /* 128BIT_MMX_UOP */
20211e4d 252 0x01, 0x1a,
1da177e4
LT
253 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
254 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
255 },
256
257 { /* X87_FP_UOP */
20211e4d 258 0x01, 0x04,
1da177e4
LT
259 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
260 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
261 },
20211e4d 262
1da177e4 263 { /* X87_SIMD_MOVES_UOP */
20211e4d 264 0x01, 0x2e,
1da177e4
LT
265 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
266 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
267 },
20211e4d 268
1da177e4 269 { /* MACHINE_CLEAR */
20211e4d 270 0x05, 0x02,
1da177e4
LT
271 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
272 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
273 },
274
275 { /* GLOBAL_POWER_EVENTS */
276 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
277 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
278 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
279 },
20211e4d 280
1da177e4 281 { /* TC_MS_XFER */
20211e4d 282 0x00, 0x05,
1da177e4
LT
283 { { CTR_MS_0, MSR_P4_MS_ESCR0},
284 { CTR_MS_2, MSR_P4_MS_ESCR1} }
285 },
286
287 { /* UOP_QUEUE_WRITES */
288 0x00, 0x09,
289 { { CTR_MS_0, MSR_P4_MS_ESCR0},
290 { CTR_MS_2, MSR_P4_MS_ESCR1} }
291 },
292
293 { /* FRONT_END_EVENT */
294 0x05, 0x08,
295 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
296 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
297 },
298
299 { /* EXECUTION_EVENT */
300 0x05, 0x0c,
301 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
302 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
303 },
304
305 { /* REPLAY_EVENT */
306 0x05, 0x09,
307 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
308 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
309 },
310
311 { /* INSTR_RETIRED */
20211e4d 312 0x04, 0x02,
1da177e4
LT
313 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
314 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
315 },
316
317 { /* UOPS_RETIRED */
318 0x04, 0x01,
319 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
320 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
321 },
322
20211e4d
PC
323 { /* UOP_TYPE */
324 0x02, 0x02,
1da177e4
LT
325 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
326 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
327 },
328
329 { /* RETIRED_MISPRED_BRANCH_TYPE */
20211e4d 330 0x02, 0x05,
1da177e4
LT
331 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
332 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
333 },
334
335 { /* RETIRED_BRANCH_TYPE */
336 0x02, 0x04,
337 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
338 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
339 }
340};
341
342
343#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
344
345#define ESCR_RESERVED_BITS 0x80000003
346#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
347#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
348#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
349#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
20211e4d
PC
353#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
354#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
1da177e4
LT
355
356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
20211e4d
PC
364#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
365#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
1da177e4
LT
366#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
367#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
368
20211e4d
PC
369#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
370#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
1da177e4
LT
371#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
372
373
374/* this assigns a "stagger" to the current CPU, which is used throughout
375 the code in this module as an extra array offset, to select the "even"
376 or "odd" part of all the divided resources. */
377static unsigned int get_stagger(void)
378{
379#ifdef CONFIG_SMP
380 int cpu = smp_processor_id();
7ad728f9 381 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
20211e4d 382#endif
1da177e4
LT
383 return 0;
384}
385
386
387/* finally, mediate access to a real hardware counter
388 by passing a "virtual" counter numer to this macro,
389 along with your stagger setting. */
390#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
391
392static unsigned long reset_value[NUM_COUNTERS_NON_HT];
393
394
395static void p4_fill_in_addresses(struct op_msrs * const msrs)
396{
20211e4d 397 unsigned int i;
cb9c448c 398 unsigned int addr, cccraddr, stag;
1da177e4
LT
399
400 setup_num_counters();
401 stag = get_stagger();
402
cb9c448c 403 /* initialize some registers */
20211e4d 404 for (i = 0; i < num_counters; ++i)
cb9c448c 405 msrs->counters[i].addr = 0;
20211e4d 406 for (i = 0; i < num_controls; ++i)
cb9c448c 407 msrs->controls[i].addr = 0;
20211e4d 408
cb9c448c
DZ
409 /* the counter & cccr registers we pay attention to */
410 for (i = 0; i < num_counters; ++i) {
411 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
412 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
20211e4d 413 if (reserve_perfctr_nmi(addr)) {
cb9c448c
DZ
414 msrs->counters[i].addr = addr;
415 msrs->controls[i].addr = cccraddr;
416 }
417 }
418
1da177e4
LT
419 /* 43 ESCR registers in three or four discontiguous group */
420 for (addr = MSR_P4_BSU_ESCR0 + stag;
421 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
422 if (reserve_evntsel_nmi(addr))
423 msrs->controls[i].addr = addr;
1da177e4
LT
424 }
425
426 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
427 * to avoid special case in nmi_{save|restore}_registers() */
428 if (boot_cpu_data.x86_model >= 0x3) {
429 for (addr = MSR_P4_BSU_ESCR0 + stag;
430 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
431 if (reserve_evntsel_nmi(addr))
432 msrs->controls[i].addr = addr;
1da177e4
LT
433 }
434 } else {
435 for (addr = MSR_P4_IQ_ESCR0 + stag;
436 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
437 if (reserve_evntsel_nmi(addr))
438 msrs->controls[i].addr = addr;
1da177e4
LT
439 }
440 }
441
442 for (addr = MSR_P4_RAT_ESCR0 + stag;
443 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
444 if (reserve_evntsel_nmi(addr))
445 msrs->controls[i].addr = addr;
1da177e4 446 }
20211e4d 447
1da177e4 448 for (addr = MSR_P4_MS_ESCR0 + stag;
20211e4d 449 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
450 if (reserve_evntsel_nmi(addr))
451 msrs->controls[i].addr = addr;
1da177e4 452 }
20211e4d 453
1da177e4 454 for (addr = MSR_P4_IX_ESCR0 + stag;
20211e4d 455 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
cb9c448c
DZ
456 if (reserve_evntsel_nmi(addr))
457 msrs->controls[i].addr = addr;
1da177e4
LT
458 }
459
460 /* there are 2 remaining non-contiguously located ESCRs */
461
20211e4d 462 if (num_counters == NUM_COUNTERS_NON_HT) {
1da177e4 463 /* standard non-HT CPUs handle both remaining ESCRs*/
cb9c448c
DZ
464 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
465 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
468
469 } else if (stag == 0) {
470 /* HT CPUs give the first remainder to the even thread, as
471 the 32nd control register */
cb9c448c
DZ
472 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
474
475 } else {
476 /* and two copies of the second to the odd thread,
477 for the 22st and 23nd control registers */
cb9c448c
DZ
478 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
479 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
480 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
481 }
1da177e4
LT
482 }
483}
484
485
486static void pmc_setup_one_p4_counter(unsigned int ctr)
487{
488 int i;
489 int const maxbind = 2;
490 unsigned int cccr = 0;
491 unsigned int escr = 0;
492 unsigned int high = 0;
493 unsigned int counter_bit;
494 struct p4_event_binding *ev = NULL;
495 unsigned int stag;
496
497 stag = get_stagger();
20211e4d 498
1da177e4
LT
499 /* convert from counter *number* to counter *bit* */
500 counter_bit = 1 << VIRT_CTR(stag, ctr);
20211e4d 501
1da177e4
LT
502 /* find our event binding structure. */
503 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
20211e4d
PC
504 printk(KERN_ERR
505 "oprofile: P4 event code 0x%lx out of range\n",
1da177e4
LT
506 counter_config[ctr].event);
507 return;
508 }
20211e4d 509
1da177e4 510 ev = &(p4_events[counter_config[ctr].event - 1]);
20211e4d 511
1da177e4
LT
512 for (i = 0; i < maxbind; i++) {
513 if (ev->bindings[i].virt_counter & counter_bit) {
514
515 /* modify ESCR */
516 ESCR_READ(escr, high, ev, i);
517 ESCR_CLEAR(escr);
518 if (stag == 0) {
519 ESCR_SET_USR_0(escr, counter_config[ctr].user);
520 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
521 } else {
522 ESCR_SET_USR_1(escr, counter_config[ctr].user);
523 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
524 }
525 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
20211e4d 526 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
1da177e4 527 ESCR_WRITE(escr, high, ev, i);
20211e4d 528
1da177e4
LT
529 /* modify CCCR */
530 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
531 CCCR_CLEAR(cccr);
532 CCCR_SET_REQUIRED_BITS(cccr);
533 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
20211e4d 534 if (stag == 0)
1da177e4 535 CCCR_SET_PMI_OVF_0(cccr);
20211e4d 536 else
1da177e4 537 CCCR_SET_PMI_OVF_1(cccr);
1da177e4
LT
538 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
539 return;
540 }
541 }
542
20211e4d 543 printk(KERN_ERR
1da177e4
LT
544 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
545 counter_config[ctr].event, stag, ctr);
546}
547
548
549static void p4_setup_ctrs(struct op_msrs const * const msrs)
550{
551 unsigned int i;
552 unsigned int low, high;
1da177e4
LT
553 unsigned int stag;
554
555 stag = get_stagger();
556
557 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
20211e4d 558 if (!MISC_PMC_ENABLED_P(low)) {
1da177e4
LT
559 printk(KERN_ERR "oprofile: P4 PMC not available\n");
560 return;
561 }
562
563 /* clear the cccrs we will use */
564 for (i = 0 ; i < num_counters ; i++) {
20211e4d 565 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
cb9c448c 566 continue;
1da177e4
LT
567 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
568 CCCR_CLEAR(low);
569 CCCR_SET_REQUIRED_BITS(low);
570 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
571 }
572
1da177e4 573 /* clear all escrs (including those outside our concern) */
cb9c448c 574 for (i = num_counters; i < num_controls; i++) {
20211e4d 575 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
cb9c448c
DZ
576 continue;
577 wrmsr(msrs->controls[i].addr, 0, 0);
1da177e4
LT
578 }
579
1da177e4
LT
580 /* setup all counters */
581 for (i = 0 ; i < num_counters ; ++i) {
20211e4d 582 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
1da177e4
LT
583 reset_value[i] = counter_config[i].count;
584 pmc_setup_one_p4_counter(i);
585 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
586 } else {
587 reset_value[i] = 0;
588 }
589 }
590}
591
592
593static int p4_check_ctrs(struct pt_regs * const regs,
594 struct op_msrs const * const msrs)
595{
596 unsigned long ctr, low, high, stag, real;
597 int i;
598
599 stag = get_stagger();
600
601 for (i = 0; i < num_counters; ++i) {
20211e4d
PC
602
603 if (!reset_value[i])
1da177e4
LT
604 continue;
605
20211e4d 606 /*
1da177e4
LT
607 * there is some eccentricity in the hardware which
608 * requires that we perform 2 extra corrections:
609 *
610 * - check both the CCCR:OVF flag for overflow and the
611 * counter high bit for un-flagged overflows.
612 *
613 * - write the counter back twice to ensure it gets
614 * updated properly.
20211e4d 615 *
1da177e4
LT
616 * the former seems to be related to extra NMIs happening
617 * during the current NMI; the latter is reported as errata
618 * N15 in intel doc 249199-029, pentium 4 specification
619 * update, though their suggested work-around does not
620 * appear to solve the problem.
621 */
20211e4d 622
1da177e4
LT
623 real = VIRT_CTR(stag, i);
624
625 CCCR_READ(low, high, real);
20211e4d 626 CTR_READ(ctr, high, real);
1da177e4
LT
627 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
628 oprofile_add_sample(regs, i);
20211e4d 629 CTR_WRITE(reset_value[i], real);
1da177e4
LT
630 CCCR_CLEAR_OVF(low);
631 CCCR_WRITE(low, high, real);
20211e4d 632 CTR_WRITE(reset_value[i], real);
1da177e4
LT
633 }
634 }
635
636 /* P4 quirk: you have to re-unmask the apic vector */
637 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
638
639 /* See op_model_ppro.c */
640 return 1;
641}
642
643
644static void p4_start(struct op_msrs const * const msrs)
645{
646 unsigned int low, high, stag;
647 int i;
648
649 stag = get_stagger();
650
651 for (i = 0; i < num_counters; ++i) {
652 if (!reset_value[i])
653 continue;
654 CCCR_READ(low, high, VIRT_CTR(stag, i));
655 CCCR_SET_ENABLE(low);
656 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
657 }
658}
659
660
661static void p4_stop(struct op_msrs const * const msrs)
662{
663 unsigned int low, high, stag;
664 int i;
665
666 stag = get_stagger();
667
668 for (i = 0; i < num_counters; ++i) {
cb9c448c
DZ
669 if (!reset_value[i])
670 continue;
1da177e4
LT
671 CCCR_READ(low, high, VIRT_CTR(stag, i));
672 CCCR_SET_DISABLE(low);
673 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
674 }
675}
676
cb9c448c
DZ
677static void p4_shutdown(struct op_msrs const * const msrs)
678{
679 int i;
680
681 for (i = 0 ; i < num_counters ; ++i) {
20211e4d 682 if (CTR_IS_RESERVED(msrs, i))
cb9c448c
DZ
683 release_perfctr_nmi(msrs->counters[i].addr);
684 }
20211e4d
PC
685 /*
686 * some of the control registers are specially reserved in
cb9c448c
DZ
687 * conjunction with the counter registers (hence the starting offset).
688 * This saves a few bits.
689 */
690 for (i = num_counters ; i < num_controls ; ++i) {
20211e4d 691 if (CTRL_IS_RESERVED(msrs, i))
cb9c448c
DZ
692 release_evntsel_nmi(msrs->controls[i].addr);
693 }
694}
695
1da177e4
LT
696
697#ifdef CONFIG_SMP
698struct op_x86_model_spec const op_p4_ht2_spec = {
c92960fc
RR
699 .num_counters = NUM_COUNTERS_HT2,
700 .num_controls = NUM_CONTROLS_HT2,
701 .fill_in_addresses = &p4_fill_in_addresses,
702 .setup_ctrs = &p4_setup_ctrs,
703 .check_ctrs = &p4_check_ctrs,
704 .start = &p4_start,
705 .stop = &p4_stop,
706 .shutdown = &p4_shutdown
1da177e4
LT
707};
708#endif
709
710struct op_x86_model_spec const op_p4_spec = {
c92960fc
RR
711 .num_counters = NUM_COUNTERS_NON_HT,
712 .num_controls = NUM_CONTROLS_NON_HT,
713 .fill_in_addresses = &p4_fill_in_addresses,
714 .setup_ctrs = &p4_setup_ctrs,
715 .check_ctrs = &p4_check_ctrs,
716 .start = &p4_start,
717 .stop = &p4_stop,
718 .shutdown = &p4_shutdown
1da177e4 719};