Merge branch 'bkl/procfs' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic...
[linux-2.6-block.git] / arch / x86 / oprofile / op_model_p4.c
CommitLineData
1da177e4
LT
1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
20211e4d
PC
13#include <linux/ptrace.h>
14#include <linux/nmi.h>
1da177e4 15#include <asm/msr.h>
1da177e4
LT
16#include <asm/fixmap.h>
17#include <asm/apic.h>
20211e4d 18
1da177e4
LT
19
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
42399adb
RR
35#define OP_CTR_OVERFLOW (1ULL<<31)
36
1da177e4 37static unsigned int num_counters = NUM_COUNTERS_NON_HT;
cb9c448c 38static unsigned int num_controls = NUM_CONTROLS_NON_HT;
1da177e4
LT
39
40/* this has to be checked dynamically since the
41 hyper-threadedness of a chip is discovered at
42 kernel boot-time. */
43static inline void setup_num_counters(void)
44{
45#ifdef CONFIG_SMP
20211e4d 46 if (smp_num_siblings == 2) {
1da177e4 47 num_counters = NUM_COUNTERS_HT2;
cb9c448c
DZ
48 num_controls = NUM_CONTROLS_HT2;
49 }
1da177e4
LT
50#endif
51}
52
53static int inline addr_increment(void)
54{
55#ifdef CONFIG_SMP
56 return smp_num_siblings == 2 ? 2 : 1;
57#else
58 return 1;
59#endif
60}
61
62
63/* tables to simulate simplified hardware view of p4 registers */
64struct p4_counter_binding {
65 int virt_counter;
66 int counter_address;
67 int cccr_address;
68};
69
70struct p4_event_binding {
71 int escr_select; /* value to put in CCCR */
72 int event_select; /* value to put in ESCR */
73 struct {
74 int virt_counter; /* for this counter... */
75 int escr_address; /* use this ESCR */
76 } bindings[2];
77};
78
79/* nb: these CTR_* defines are a duplicate of defines in
80 event/i386.p4*events. */
81
82
83#define CTR_BPU_0 (1 << 0)
84#define CTR_MS_0 (1 << 1)
85#define CTR_FLAME_0 (1 << 2)
86#define CTR_IQ_4 (1 << 3)
87#define CTR_BPU_2 (1 << 4)
88#define CTR_MS_2 (1 << 5)
89#define CTR_FLAME_2 (1 << 6)
90#define CTR_IQ_5 (1 << 7)
91
20211e4d 92static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
1da177e4
LT
93 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
94 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
95 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
97 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
98 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
99 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
101};
102
20211e4d 103#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
1da177e4 104
1da177e4
LT
105/* p4 event codes in libop/op_event.h are indices into this table. */
106
107static struct p4_event_binding p4_events[NUM_EVENTS] = {
20211e4d 108
1da177e4 109 { /* BRANCH_RETIRED */
20211e4d 110 0x05, 0x06,
1da177e4
LT
111 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
113 },
20211e4d 114
1da177e4 115 { /* MISPRED_BRANCH_RETIRED */
20211e4d 116 0x04, 0x03,
1da177e4
LT
117 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
119 },
20211e4d 120
1da177e4
LT
121 { /* TC_DELIVER_MODE */
122 0x01, 0x01,
20211e4d 123 { { CTR_MS_0, MSR_P4_TC_ESCR0},
1da177e4
LT
124 { CTR_MS_2, MSR_P4_TC_ESCR1} }
125 },
20211e4d 126
1da177e4 127 { /* BPU_FETCH_REQUEST */
20211e4d 128 0x00, 0x03,
1da177e4
LT
129 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
131 },
132
133 { /* ITLB_REFERENCE */
134 0x03, 0x18,
135 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
137 },
138
139 { /* MEMORY_CANCEL */
140 0x05, 0x02,
141 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
143 },
144
145 { /* MEMORY_COMPLETE */
146 0x02, 0x08,
147 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
149 },
150
151 { /* LOAD_PORT_REPLAY */
20211e4d 152 0x02, 0x04,
1da177e4
LT
153 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
155 },
156
157 { /* STORE_PORT_REPLAY */
158 0x02, 0x05,
159 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
161 },
162
163 { /* MOB_LOAD_REPLAY */
164 0x02, 0x03,
165 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
167 },
168
169 { /* PAGE_WALK_TYPE */
170 0x04, 0x01,
171 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
173 },
174
175 { /* BSQ_CACHE_REFERENCE */
20211e4d 176 0x07, 0x0c,
1da177e4
LT
177 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
179 },
180
181 { /* IOQ_ALLOCATION */
20211e4d 182 0x06, 0x03,
1da177e4
LT
183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
184 { 0, 0 } }
185 },
186
187 { /* IOQ_ACTIVE_ENTRIES */
20211e4d 188 0x06, 0x1a,
1da177e4
LT
189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
190 { 0, 0 } }
191 },
192
193 { /* FSB_DATA_ACTIVITY */
20211e4d 194 0x06, 0x17,
1da177e4
LT
195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
197 },
198
199 { /* BSQ_ALLOCATION */
20211e4d 200 0x07, 0x05,
1da177e4
LT
201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
202 { 0, 0 } }
203 },
204
205 { /* BSQ_ACTIVE_ENTRIES */
206 0x07, 0x06,
20211e4d 207 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
1da177e4
LT
208 { 0, 0 } }
209 },
210
211 { /* X87_ASSIST */
20211e4d 212 0x05, 0x03,
1da177e4
LT
213 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
215 },
216
217 { /* SSE_INPUT_ASSIST */
218 0x01, 0x34,
219 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
221 },
20211e4d 222
1da177e4 223 { /* PACKED_SP_UOP */
20211e4d 224 0x01, 0x08,
1da177e4
LT
225 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227 },
20211e4d 228
1da177e4 229 { /* PACKED_DP_UOP */
20211e4d 230 0x01, 0x0c,
1da177e4
LT
231 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233 },
234
235 { /* SCALAR_SP_UOP */
20211e4d 236 0x01, 0x0a,
1da177e4
LT
237 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239 },
240
241 { /* SCALAR_DP_UOP */
242 0x01, 0x0e,
243 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245 },
246
247 { /* 64BIT_MMX_UOP */
20211e4d 248 0x01, 0x02,
1da177e4
LT
249 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251 },
20211e4d 252
1da177e4 253 { /* 128BIT_MMX_UOP */
20211e4d 254 0x01, 0x1a,
1da177e4
LT
255 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257 },
258
259 { /* X87_FP_UOP */
20211e4d 260 0x01, 0x04,
1da177e4
LT
261 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263 },
20211e4d 264
1da177e4 265 { /* X87_SIMD_MOVES_UOP */
20211e4d 266 0x01, 0x2e,
1da177e4
LT
267 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269 },
20211e4d 270
1da177e4 271 { /* MACHINE_CLEAR */
20211e4d 272 0x05, 0x02,
1da177e4
LT
273 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
275 },
276
277 { /* GLOBAL_POWER_EVENTS */
278 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
281 },
20211e4d 282
1da177e4 283 { /* TC_MS_XFER */
20211e4d 284 0x00, 0x05,
1da177e4
LT
285 { { CTR_MS_0, MSR_P4_MS_ESCR0},
286 { CTR_MS_2, MSR_P4_MS_ESCR1} }
287 },
288
289 { /* UOP_QUEUE_WRITES */
290 0x00, 0x09,
291 { { CTR_MS_0, MSR_P4_MS_ESCR0},
292 { CTR_MS_2, MSR_P4_MS_ESCR1} }
293 },
294
295 { /* FRONT_END_EVENT */
296 0x05, 0x08,
297 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
299 },
300
301 { /* EXECUTION_EVENT */
302 0x05, 0x0c,
303 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305 },
306
307 { /* REPLAY_EVENT */
308 0x05, 0x09,
309 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311 },
312
313 { /* INSTR_RETIRED */
20211e4d 314 0x04, 0x02,
1da177e4
LT
315 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
317 },
318
319 { /* UOPS_RETIRED */
320 0x04, 0x01,
321 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323 },
324
20211e4d
PC
325 { /* UOP_TYPE */
326 0x02, 0x02,
1da177e4
LT
327 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
329 },
330
331 { /* RETIRED_MISPRED_BRANCH_TYPE */
20211e4d 332 0x02, 0x05,
1da177e4
LT
333 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
335 },
336
337 { /* RETIRED_BRANCH_TYPE */
338 0x02, 0x04,
339 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341 }
342};
343
344
345#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346
347#define ESCR_RESERVED_BITS 0x80000003
348#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
1da177e4
LT
355
356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
1da177e4
LT
364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366
1da177e4
LT
367
368/* this assigns a "stagger" to the current CPU, which is used throughout
369 the code in this module as an extra array offset, to select the "even"
370 or "odd" part of all the divided resources. */
371static unsigned int get_stagger(void)
372{
373#ifdef CONFIG_SMP
374 int cpu = smp_processor_id();
7ad728f9 375 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
20211e4d 376#endif
1da177e4
LT
377 return 0;
378}
379
380
381/* finally, mediate access to a real hardware counter
382 by passing a "virtual" counter numer to this macro,
383 along with your stagger setting. */
384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385
386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387
83300ce0
RR
388static void p4_shutdown(struct op_msrs const * const msrs)
389{
390 int i;
391
392 for (i = 0; i < num_counters; ++i) {
393 if (msrs->counters[i].addr)
394 release_perfctr_nmi(msrs->counters[i].addr);
395 }
396 /*
397 * some of the control registers are specially reserved in
398 * conjunction with the counter registers (hence the starting offset).
399 * This saves a few bits.
400 */
401 for (i = num_counters; i < num_controls; ++i) {
402 if (msrs->controls[i].addr)
403 release_evntsel_nmi(msrs->controls[i].addr);
404 }
405}
1da177e4 406
8617f98c 407static int p4_fill_in_addresses(struct op_msrs * const msrs)
1da177e4 408{
20211e4d 409 unsigned int i;
cb9c448c 410 unsigned int addr, cccraddr, stag;
1da177e4
LT
411
412 setup_num_counters();
413 stag = get_stagger();
414
cb9c448c
DZ
415 /* the counter & cccr registers we pay attention to */
416 for (i = 0; i < num_counters; ++i) {
417 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
418 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
20211e4d 419 if (reserve_perfctr_nmi(addr)) {
cb9c448c
DZ
420 msrs->counters[i].addr = addr;
421 msrs->controls[i].addr = cccraddr;
422 }
423 }
424
1da177e4
LT
425 /* 43 ESCR registers in three or four discontiguous group */
426 for (addr = MSR_P4_BSU_ESCR0 + stag;
427 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
428 if (reserve_evntsel_nmi(addr))
429 msrs->controls[i].addr = addr;
1da177e4
LT
430 }
431
432 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
433 * to avoid special case in nmi_{save|restore}_registers() */
434 if (boot_cpu_data.x86_model >= 0x3) {
435 for (addr = MSR_P4_BSU_ESCR0 + stag;
436 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
437 if (reserve_evntsel_nmi(addr))
438 msrs->controls[i].addr = addr;
1da177e4
LT
439 }
440 } else {
441 for (addr = MSR_P4_IQ_ESCR0 + stag;
442 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
443 if (reserve_evntsel_nmi(addr))
444 msrs->controls[i].addr = addr;
1da177e4
LT
445 }
446 }
447
448 for (addr = MSR_P4_RAT_ESCR0 + stag;
449 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
cb9c448c
DZ
450 if (reserve_evntsel_nmi(addr))
451 msrs->controls[i].addr = addr;
1da177e4 452 }
20211e4d 453
1da177e4 454 for (addr = MSR_P4_MS_ESCR0 + stag;
20211e4d 455 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
cb9c448c
DZ
456 if (reserve_evntsel_nmi(addr))
457 msrs->controls[i].addr = addr;
1da177e4 458 }
20211e4d 459
1da177e4 460 for (addr = MSR_P4_IX_ESCR0 + stag;
20211e4d 461 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
cb9c448c
DZ
462 if (reserve_evntsel_nmi(addr))
463 msrs->controls[i].addr = addr;
1da177e4
LT
464 }
465
466 /* there are 2 remaining non-contiguously located ESCRs */
467
20211e4d 468 if (num_counters == NUM_COUNTERS_NON_HT) {
1da177e4 469 /* standard non-HT CPUs handle both remaining ESCRs*/
cb9c448c
DZ
470 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
471 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
472 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
474
475 } else if (stag == 0) {
476 /* HT CPUs give the first remainder to the even thread, as
477 the 32nd control register */
cb9c448c
DZ
478 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
479 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4
LT
480
481 } else {
482 /* and two copies of the second to the odd thread,
483 for the 22st and 23nd control registers */
cb9c448c
DZ
484 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
485 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
486 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
487 }
1da177e4 488 }
8617f98c
RR
489
490 for (i = 0; i < num_counters; ++i) {
491 if (!counter_config[i].enabled)
492 continue;
493 if (msrs->controls[i].addr)
494 continue;
495 op_x86_warn_reserved(i);
496 p4_shutdown(msrs);
497 return -EBUSY;
498 }
499
500 return 0;
1da177e4
LT
501}
502
503
504static void pmc_setup_one_p4_counter(unsigned int ctr)
505{
506 int i;
507 int const maxbind = 2;
508 unsigned int cccr = 0;
509 unsigned int escr = 0;
510 unsigned int high = 0;
511 unsigned int counter_bit;
512 struct p4_event_binding *ev = NULL;
513 unsigned int stag;
514
515 stag = get_stagger();
20211e4d 516
1da177e4
LT
517 /* convert from counter *number* to counter *bit* */
518 counter_bit = 1 << VIRT_CTR(stag, ctr);
20211e4d 519
1da177e4
LT
520 /* find our event binding structure. */
521 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
20211e4d
PC
522 printk(KERN_ERR
523 "oprofile: P4 event code 0x%lx out of range\n",
1da177e4
LT
524 counter_config[ctr].event);
525 return;
526 }
20211e4d 527
1da177e4 528 ev = &(p4_events[counter_config[ctr].event - 1]);
20211e4d 529
1da177e4
LT
530 for (i = 0; i < maxbind; i++) {
531 if (ev->bindings[i].virt_counter & counter_bit) {
532
533 /* modify ESCR */
1131a478 534 rdmsr(ev->bindings[i].escr_address, escr, high);
1da177e4
LT
535 ESCR_CLEAR(escr);
536 if (stag == 0) {
537 ESCR_SET_USR_0(escr, counter_config[ctr].user);
538 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
539 } else {
540 ESCR_SET_USR_1(escr, counter_config[ctr].user);
541 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
542 }
543 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
20211e4d 544 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
1131a478 545 wrmsr(ev->bindings[i].escr_address, escr, high);
20211e4d 546
1da177e4 547 /* modify CCCR */
1131a478
RR
548 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
549 cccr, high);
1da177e4
LT
550 CCCR_CLEAR(cccr);
551 CCCR_SET_REQUIRED_BITS(cccr);
552 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
20211e4d 553 if (stag == 0)
1da177e4 554 CCCR_SET_PMI_OVF_0(cccr);
20211e4d 555 else
1da177e4 556 CCCR_SET_PMI_OVF_1(cccr);
1131a478
RR
557 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
558 cccr, high);
1da177e4
LT
559 return;
560 }
561 }
562
20211e4d 563 printk(KERN_ERR
1da177e4
LT
564 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
565 counter_config[ctr].event, stag, ctr);
566}
567
568
ef8828dd
RR
569static void p4_setup_ctrs(struct op_x86_model_spec const *model,
570 struct op_msrs const * const msrs)
1da177e4
LT
571{
572 unsigned int i;
573 unsigned int low, high;
1da177e4
LT
574 unsigned int stag;
575
576 stag = get_stagger();
577
578 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
20211e4d 579 if (!MISC_PMC_ENABLED_P(low)) {
1da177e4
LT
580 printk(KERN_ERR "oprofile: P4 PMC not available\n");
581 return;
582 }
583
584 /* clear the cccrs we will use */
6e63ea4b 585 for (i = 0; i < num_counters; i++) {
217d3cfb 586 if (unlikely(!msrs->controls[i].addr))
cb9c448c 587 continue;
1da177e4
LT
588 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
589 CCCR_CLEAR(low);
590 CCCR_SET_REQUIRED_BITS(low);
591 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
592 }
593
1da177e4 594 /* clear all escrs (including those outside our concern) */
cb9c448c 595 for (i = num_counters; i < num_controls; i++) {
217d3cfb 596 if (unlikely(!msrs->controls[i].addr))
cb9c448c
DZ
597 continue;
598 wrmsr(msrs->controls[i].addr, 0, 0);
1da177e4
LT
599 }
600
1da177e4 601 /* setup all counters */
6e63ea4b 602 for (i = 0; i < num_counters; ++i) {
217d3cfb 603 if (counter_config[i].enabled && msrs->controls[i].addr) {
1da177e4
LT
604 reset_value[i] = counter_config[i].count;
605 pmc_setup_one_p4_counter(i);
bbc5986d 606 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
8045a4c2 607 -(u64)counter_config[i].count);
1da177e4
LT
608 } else {
609 reset_value[i] = 0;
610 }
611 }
612}
613
614
615static int p4_check_ctrs(struct pt_regs * const regs,
616 struct op_msrs const * const msrs)
617{
618 unsigned long ctr, low, high, stag, real;
619 int i;
620
621 stag = get_stagger();
622
623 for (i = 0; i < num_counters; ++i) {
20211e4d
PC
624
625 if (!reset_value[i])
1da177e4
LT
626 continue;
627
20211e4d 628 /*
1da177e4
LT
629 * there is some eccentricity in the hardware which
630 * requires that we perform 2 extra corrections:
631 *
632 * - check both the CCCR:OVF flag for overflow and the
633 * counter high bit for un-flagged overflows.
634 *
635 * - write the counter back twice to ensure it gets
636 * updated properly.
20211e4d 637 *
1da177e4
LT
638 * the former seems to be related to extra NMIs happening
639 * during the current NMI; the latter is reported as errata
640 * N15 in intel doc 249199-029, pentium 4 specification
641 * update, though their suggested work-around does not
642 * appear to solve the problem.
643 */
20211e4d 644
1da177e4
LT
645 real = VIRT_CTR(stag, i);
646
1131a478
RR
647 rdmsr(p4_counters[real].cccr_address, low, high);
648 rdmsr(p4_counters[real].counter_address, ctr, high);
42399adb 649 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
1da177e4 650 oprofile_add_sample(regs, i);
bbc5986d 651 wrmsrl(p4_counters[real].counter_address,
8045a4c2 652 -(u64)reset_value[i]);
1da177e4 653 CCCR_CLEAR_OVF(low);
1131a478 654 wrmsr(p4_counters[real].cccr_address, low, high);
bbc5986d 655 wrmsrl(p4_counters[real].counter_address,
8045a4c2 656 -(u64)reset_value[i]);
1da177e4
LT
657 }
658 }
659
660 /* P4 quirk: you have to re-unmask the apic vector */
661 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
662
663 /* See op_model_ppro.c */
664 return 1;
665}
666
667
668static void p4_start(struct op_msrs const * const msrs)
669{
670 unsigned int low, high, stag;
671 int i;
672
673 stag = get_stagger();
674
675 for (i = 0; i < num_counters; ++i) {
676 if (!reset_value[i])
677 continue;
1131a478 678 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4 679 CCCR_SET_ENABLE(low);
1131a478 680 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4
LT
681 }
682}
683
684
685static void p4_stop(struct op_msrs const * const msrs)
686{
687 unsigned int low, high, stag;
688 int i;
689
690 stag = get_stagger();
691
692 for (i = 0; i < num_counters; ++i) {
cb9c448c
DZ
693 if (!reset_value[i])
694 continue;
1131a478 695 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4 696 CCCR_SET_DISABLE(low);
1131a478 697 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4
LT
698 }
699}
700
1da177e4 701#ifdef CONFIG_SMP
259a83a8 702struct op_x86_model_spec op_p4_ht2_spec = {
c92960fc
RR
703 .num_counters = NUM_COUNTERS_HT2,
704 .num_controls = NUM_CONTROLS_HT2,
705 .fill_in_addresses = &p4_fill_in_addresses,
706 .setup_ctrs = &p4_setup_ctrs,
707 .check_ctrs = &p4_check_ctrs,
708 .start = &p4_start,
709 .stop = &p4_stop,
710 .shutdown = &p4_shutdown
1da177e4
LT
711};
712#endif
713
259a83a8 714struct op_x86_model_spec op_p4_spec = {
c92960fc
RR
715 .num_counters = NUM_COUNTERS_NON_HT,
716 .num_controls = NUM_CONTROLS_NON_HT,
717 .fill_in_addresses = &p4_fill_in_addresses,
718 .setup_ctrs = &p4_setup_ctrs,
719 .check_ctrs = &p4_check_ctrs,
720 .start = &p4_start,
721 .stop = &p4_stop,
722 .shutdown = &p4_shutdown
1da177e4 723};