move die notifier handling to common code
[linux-2.6-block.git] / arch / x86_64 / kernel / mce.c
CommitLineData
1da177e4
LT
1/*
2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it.
6 */
7
8#include <linux/init.h>
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/string.h>
13#include <linux/rcupdate.h>
14#include <linux/kallsyms.h>
15#include <linux/sysdev.h>
16#include <linux/miscdevice.h>
17#include <linux/fs.h>
a9415644 18#include <linux/capability.h>
91c6d400
AK
19#include <linux/cpu.h>
20#include <linux/percpu.h>
8c566ef5 21#include <linux/ctype.h>
a98f0dd3 22#include <linux/kmod.h>
1eeb66a1 23#include <linux/kdebug.h>
1da177e4
LT
24#include <asm/processor.h>
25#include <asm/msr.h>
26#include <asm/mce.h>
1da177e4 27#include <asm/uaccess.h>
0a9c3ee7 28#include <asm/smp.h>
1da177e4
LT
29
30#define MISC_MCELOG_MINOR 227
73ca5358 31#define NR_BANKS 6
1da177e4 32
553f265f
AK
33atomic_t mce_entry;
34
1da177e4
LT
35static int mce_dont_init;
36
37/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
38 3: never panic or exit (for testing only) */
39static int tolerant = 1;
40static int banks;
41static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
42static unsigned long console_logged;
43static int notify_user;
94ad8474 44static int rip_msr;
e583538f 45static int mce_bootlog = 1;
a98f0dd3
AK
46static atomic_t mce_events;
47
48static char trigger[128];
49static char *trigger_argv[2] = { trigger, NULL };
1da177e4
LT
50
51/*
52 * Lockless MCE logging infrastructure.
53 * This avoids deadlocks on printk locks without having to break locks. Also
54 * separate MCEs from kernel messages to avoid bogus bug reports.
55 */
56
57struct mce_log mcelog = {
58 MCE_LOG_SIGNATURE,
59 MCE_LOG_LEN,
60};
61
62void mce_log(struct mce *mce)
63{
64 unsigned next, entry;
a98f0dd3 65 atomic_inc(&mce_events);
1da177e4 66 mce->finished = 0;
7644143c 67 wmb();
1da177e4
LT
68 for (;;) {
69 entry = rcu_dereference(mcelog.next);
7644143c
MW
70 /* The rmb forces the compiler to reload next in each
71 iteration */
72 rmb();
673242c1
AK
73 for (;;) {
74 /* When the buffer fills up discard new entries. Assume
75 that the earlier errors are the more interesting. */
76 if (entry >= MCE_LOG_LEN) {
77 set_bit(MCE_OVERFLOW, &mcelog.flags);
78 return;
79 }
80 /* Old left over entry. Skip. */
81 if (mcelog.entry[entry].finished) {
82 entry++;
83 continue;
84 }
7644143c 85 break;
1da177e4 86 }
1da177e4
LT
87 smp_rmb();
88 next = entry + 1;
89 if (cmpxchg(&mcelog.next, entry, next) == entry)
90 break;
91 }
92 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
7644143c 93 wmb();
1da177e4 94 mcelog.entry[entry].finished = 1;
7644143c 95 wmb();
1da177e4
LT
96
97 if (!test_and_set_bit(0, &console_logged))
98 notify_user = 1;
99}
100
101static void print_mce(struct mce *m)
102{
103 printk(KERN_EMERG "\n"
4855170f 104 KERN_EMERG "HARDWARE ERROR\n"
1da177e4
LT
105 KERN_EMERG
106 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
107 m->cpu, m->mcgstatus, m->bank, m->status);
108 if (m->rip) {
109 printk(KERN_EMERG
110 "RIP%s %02x:<%016Lx> ",
111 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
112 m->cs, m->rip);
113 if (m->cs == __KERNEL_CS)
114 print_symbol("{%s}", m->rip);
115 printk("\n");
116 }
117 printk(KERN_EMERG "TSC %Lx ", m->tsc);
118 if (m->addr)
119 printk("ADDR %Lx ", m->addr);
120 if (m->misc)
121 printk("MISC %Lx ", m->misc);
122 printk("\n");
4855170f
AK
123 printk(KERN_EMERG "This is not a software problem!\n");
124 printk(KERN_EMERG
125 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
1da177e4
LT
126}
127
128static void mce_panic(char *msg, struct mce *backup, unsigned long start)
129{
130 int i;
131 oops_begin();
132 for (i = 0; i < MCE_LOG_LEN; i++) {
133 unsigned long tsc = mcelog.entry[i].tsc;
134 if (time_before(tsc, start))
135 continue;
136 print_mce(&mcelog.entry[i]);
137 if (backup && mcelog.entry[i].tsc == backup->tsc)
138 backup = NULL;
139 }
140 if (backup)
141 print_mce(backup);
142 if (tolerant >= 3)
143 printk("Fake panic: %s\n", msg);
144 else
145 panic(msg);
146}
147
148static int mce_available(struct cpuinfo_x86 *c)
149{
3d1712c9 150 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
1da177e4
LT
151}
152
94ad8474
AK
153static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
154{
155 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
156 m->rip = regs->rip;
157 m->cs = regs->cs;
158 } else {
159 m->rip = 0;
160 m->cs = 0;
161 }
162 if (rip_msr) {
163 /* Assume the RIP in the MSR is exact. Is this true? */
164 m->mcgstatus |= MCG_STATUS_EIPV;
165 rdmsrl(rip_msr, m->rip);
166 m->cs = 0;
167 }
168}
169
a98f0dd3
AK
170static void do_mce_trigger(void)
171{
172 static atomic_t mce_logged;
173 int events = atomic_read(&mce_events);
174 if (events != atomic_read(&mce_logged) && trigger[0]) {
175 /* Small race window, but should be harmless. */
176 atomic_set(&mce_logged, events);
177 call_usermodehelper(trigger, trigger_argv, NULL, -1);
178 }
179}
180
1da177e4
LT
181/*
182 * The actual machine check handler
183 */
184
185void do_machine_check(struct pt_regs * regs, long error_code)
186{
187 struct mce m, panicm;
188 int nowayout = (tolerant < 1);
189 int kill_it = 0;
190 u64 mcestart = 0;
191 int i;
192 int panicm_found = 0;
193
553f265f
AK
194 atomic_inc(&mce_entry);
195
1da177e4 196 if (regs)
6e3f3617 197 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
1da177e4 198 if (!banks)
553f265f 199 goto out2;
1da177e4
LT
200
201 memset(&m, 0, sizeof(struct mce));
151f8cc1 202 m.cpu = smp_processor_id();
1da177e4
LT
203 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
204 if (!(m.mcgstatus & MCG_STATUS_RIPV))
205 kill_it = 1;
206
207 rdtscll(mcestart);
208 barrier();
209
210 for (i = 0; i < banks; i++) {
211 if (!bank[i])
212 continue;
213
214 m.misc = 0;
215 m.addr = 0;
216 m.bank = i;
217 m.tsc = 0;
218
219 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
220 if ((m.status & MCI_STATUS_VAL) == 0)
221 continue;
222
223 if (m.status & MCI_STATUS_EN) {
224 /* In theory _OVER could be a nowayout too, but
225 assume any overflowed errors were no fatal. */
226 nowayout |= !!(m.status & MCI_STATUS_PCC);
227 kill_it |= !!(m.status & MCI_STATUS_UC);
228 }
229
230 if (m.status & MCI_STATUS_MISCV)
231 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
232 if (m.status & MCI_STATUS_ADDRV)
233 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
234
94ad8474 235 mce_get_rip(&m, regs);
d5172f26 236 if (error_code >= 0)
1da177e4
LT
237 rdtscll(m.tsc);
238 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
d5172f26
AK
239 if (error_code != -2)
240 mce_log(&m);
1da177e4
LT
241
242 /* Did this bank cause the exception? */
243 /* Assume that the bank with uncorrectable errors did it,
244 and that there is only a single one. */
245 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
246 panicm = m;
247 panicm_found = 1;
248 }
249
9f158333 250 add_taint(TAINT_MACHINE_CHECK);
1da177e4
LT
251 }
252
253 /* Never do anything final in the polling timer */
a98f0dd3
AK
254 if (!regs) {
255 /* Normal interrupt context here. Call trigger for any new
256 events. */
257 do_mce_trigger();
1da177e4 258 goto out;
a98f0dd3 259 }
1da177e4
LT
260
261 /* If we didn't find an uncorrectable error, pick
262 the last one (shouldn't happen, just being safe). */
263 if (!panicm_found)
264 panicm = m;
265 if (nowayout)
266 mce_panic("Machine check", &panicm, mcestart);
267 if (kill_it) {
268 int user_space = 0;
269
270 if (m.mcgstatus & MCG_STATUS_RIPV)
271 user_space = panicm.rip && (panicm.cs & 3);
272
273 /* When the machine was in user space and the CPU didn't get
274 confused it's normally not necessary to panic, unless you
275 are paranoid (tolerant == 0)
276
277 RED-PEN could be more tolerant for MCEs in idle,
278 but most likely they occur at boot anyways, where
279 it is best to just halt the machine. */
280 if ((!user_space && (panic_on_oops || tolerant < 2)) ||
281 (unsigned)current->pid <= 1)
282 mce_panic("Uncorrected machine check", &panicm, mcestart);
283
284 /* do_exit takes an awful lot of locks and has as
285 slight risk of deadlocking. If you don't want that
286 don't set tolerant >= 2 */
287 if (tolerant < 3)
288 do_exit(SIGBUS);
289 }
290
291 out:
292 /* Last thing done in the machine check exception to clear state. */
293 wrmsrl(MSR_IA32_MCG_STATUS, 0);
553f265f
AK
294 out2:
295 atomic_dec(&mce_entry);
1da177e4
LT
296}
297
15d5f839
DZ
298#ifdef CONFIG_X86_MCE_INTEL
299/***
300 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
301 * @cpu: The CPU on which the event occured.
302 * @status: Event status information
303 *
304 * This function should be called by the thermal interrupt after the
305 * event has been processed and the decision was made to log the event
306 * further.
307 *
308 * The status parameter will be saved to the 'status' field of 'struct mce'
309 * and historically has been the register value of the
310 * MSR_IA32_THERMAL_STATUS (Intel) msr.
311 */
312void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
313{
314 struct mce m;
315
316 memset(&m, 0, sizeof(m));
317 m.cpu = cpu;
318 m.bank = MCE_THERMAL_BANK;
319 m.status = status;
320 rdtscll(m.tsc);
321 mce_log(&m);
322}
323#endif /* CONFIG_X86_MCE_INTEL */
324
1da177e4 325/*
8a336b0a
TH
326 * Periodic polling timer for "silent" machine check errors. If the
327 * poller finds an MCE, poll 2x faster. When the poller finds no more
328 * errors, poll 2x slower (up to check_interval seconds).
1da177e4
LT
329 */
330
331static int check_interval = 5 * 60; /* 5 minutes */
8a336b0a 332static int next_interval; /* in jiffies */
65f27f38
DH
333static void mcheck_timer(struct work_struct *work);
334static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
1da177e4
LT
335
336static void mcheck_check_cpu(void *info)
337{
338 if (mce_available(&current_cpu_data))
339 do_machine_check(NULL, 0);
340}
341
65f27f38 342static void mcheck_timer(struct work_struct *work)
1da177e4
LT
343{
344 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
1da177e4
LT
345
346 /*
347 * It's ok to read stale data here for notify_user and
348 * console_logged as we'll simply get the updated versions
349 * on the next mcheck_timer execution and atomic operations
350 * on console_logged act as synchronization for notify_user
351 * writes.
352 */
353 if (notify_user && console_logged) {
8a336b0a
TH
354 static unsigned long last_print;
355 unsigned long now = jiffies;
356
357 /* if we logged an MCE, reduce the polling interval */
358 next_interval = max(next_interval/2, HZ/100);
1da177e4
LT
359 notify_user = 0;
360 clear_bit(0, &console_logged);
8a336b0a
TH
361 if (time_after_eq(now, last_print + (check_interval*HZ))) {
362 last_print = now;
363 printk(KERN_INFO "Machine check events logged\n");
364 }
365 } else {
366 next_interval = min(next_interval*2, check_interval*HZ);
1da177e4 367 }
8a336b0a
TH
368
369 schedule_delayed_work(&mcheck_work, next_interval);
1da177e4
LT
370}
371
372
373static __init int periodic_mcheck_init(void)
374{
8a336b0a
TH
375 next_interval = check_interval * HZ;
376 if (next_interval)
377 schedule_delayed_work(&mcheck_work, next_interval);
1da177e4
LT
378 return 0;
379}
380__initcall(periodic_mcheck_init);
381
382
383/*
384 * Initialize Machine Checks for a CPU.
385 */
386static void mce_init(void *dummy)
387{
388 u64 cap;
389 int i;
390
391 rdmsrl(MSR_IA32_MCG_CAP, cap);
392 banks = cap & 0xff;
393 if (banks > NR_BANKS) {
394 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
395 banks = NR_BANKS;
396 }
94ad8474
AK
397 /* Use accurate RIP reporting if available. */
398 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
399 rip_msr = MSR_IA32_MCG_EIP;
1da177e4
LT
400
401 /* Log the machine checks left over from the previous reset.
402 This also clears all registers */
d5172f26 403 do_machine_check(NULL, mce_bootlog ? -1 : -2);
1da177e4
LT
404
405 set_in_cr4(X86_CR4_MCE);
406
407 if (cap & MCG_CTL_P)
408 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
409
410 for (i = 0; i < banks; i++) {
411 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
412 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
413 }
414}
415
416/* Add per CPU specific workarounds here */
e6982c67 417static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1da177e4
LT
418{
419 /* This should be disabled by the BIOS, but isn't always */
420 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
421 /* disable GART TBL walk error reporting, which trips off
422 incorrectly with the IOMMU & 3ware & Cerberus. */
423 clear_bit(10, &bank[4]);
e583538f
AK
424 /* Lots of broken BIOS around that don't clear them
425 by default and leave crap in there. Don't log. */
426 mce_bootlog = 0;
1da177e4 427 }
e583538f 428
1da177e4
LT
429}
430
e6982c67 431static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
1da177e4
LT
432{
433 switch (c->x86_vendor) {
434 case X86_VENDOR_INTEL:
435 mce_intel_feature_init(c);
436 break;
89b831ef
JS
437 case X86_VENDOR_AMD:
438 mce_amd_feature_init(c);
439 break;
1da177e4
LT
440 default:
441 break;
442 }
443}
444
445/*
446 * Called for each booted CPU to set up machine checks.
447 * Must be called with preempt off.
448 */
e6982c67 449void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1da177e4 450{
7ded5689 451 static cpumask_t mce_cpus = CPU_MASK_NONE;
1da177e4
LT
452
453 mce_cpu_quirks(c);
454
455 if (mce_dont_init ||
456 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
457 !mce_available(c))
458 return;
459
460 mce_init(NULL);
461 mce_cpu_features(c);
462}
463
464/*
465 * Character device to read and clear the MCE log.
466 */
467
468static void collect_tscs(void *data)
469{
470 unsigned long *cpu_tsc = (unsigned long *)data;
471 rdtscll(cpu_tsc[smp_processor_id()]);
472}
473
474static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
475{
f0de53bb 476 unsigned long *cpu_tsc;
1da177e4
LT
477 static DECLARE_MUTEX(mce_read_sem);
478 unsigned next;
479 char __user *buf = ubuf;
480 int i, err;
481
f0de53bb
AK
482 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
483 if (!cpu_tsc)
484 return -ENOMEM;
485
1da177e4
LT
486 down(&mce_read_sem);
487 next = rcu_dereference(mcelog.next);
488
489 /* Only supports full reads right now */
490 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
491 up(&mce_read_sem);
f0de53bb 492 kfree(cpu_tsc);
1da177e4
LT
493 return -EINVAL;
494 }
495
496 err = 0;
673242c1
AK
497 for (i = 0; i < next; i++) {
498 unsigned long start = jiffies;
499 while (!mcelog.entry[i].finished) {
500 if (!time_before(jiffies, start + 2)) {
501 memset(mcelog.entry + i,0, sizeof(struct mce));
502 continue;
503 }
504 cpu_relax();
505 }
1da177e4
LT
506 smp_rmb();
507 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
508 buf += sizeof(struct mce);
509 }
510
511 memset(mcelog.entry, 0, next * sizeof(struct mce));
512 mcelog.next = 0;
513
b2b18660 514 synchronize_sched();
1da177e4
LT
515
516 /* Collect entries that were still getting written before the synchronize. */
517
518 on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
519 for (i = next; i < MCE_LOG_LEN; i++) {
520 if (mcelog.entry[i].finished &&
521 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
522 err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
523 smp_rmb();
524 buf += sizeof(struct mce);
525 memset(&mcelog.entry[i], 0, sizeof(struct mce));
526 }
527 }
528 up(&mce_read_sem);
f0de53bb 529 kfree(cpu_tsc);
1da177e4
LT
530 return err ? -EFAULT : buf - ubuf;
531}
532
533static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
534{
535 int __user *p = (int __user *)arg;
536 if (!capable(CAP_SYS_ADMIN))
537 return -EPERM;
538 switch (cmd) {
539 case MCE_GET_RECORD_LEN:
540 return put_user(sizeof(struct mce), p);
541 case MCE_GET_LOG_LEN:
542 return put_user(MCE_LOG_LEN, p);
543 case MCE_GETCLEAR_FLAGS: {
544 unsigned flags;
545 do {
546 flags = mcelog.flags;
547 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
548 return put_user(flags, p);
549 }
550 default:
551 return -ENOTTY;
552 }
553}
554
5dfe4c96 555static const struct file_operations mce_chrdev_ops = {
1da177e4
LT
556 .read = mce_read,
557 .ioctl = mce_ioctl,
558};
559
560static struct miscdevice mce_log_device = {
561 MISC_MCELOG_MINOR,
562 "mcelog",
563 &mce_chrdev_ops,
564};
565
566/*
567 * Old style boot options parsing. Only for compatibility.
568 */
569
570static int __init mcheck_disable(char *str)
571{
572 mce_dont_init = 1;
9b41046c 573 return 1;
1da177e4
LT
574}
575
576/* mce=off disables machine check. Note you can reenable it later
d5172f26 577 using sysfs.
8c566ef5 578 mce=TOLERANCELEVEL (number, see above)
e583538f
AK
579 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
580 mce=nobootlog Don't log MCEs from before booting. */
1da177e4
LT
581static int __init mcheck_enable(char *str)
582{
d5172f26
AK
583 if (*str == '=')
584 str++;
1da177e4
LT
585 if (!strcmp(str, "off"))
586 mce_dont_init = 1;
e583538f
AK
587 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
588 mce_bootlog = str[0] == 'b';
8c566ef5
AK
589 else if (isdigit(str[0]))
590 get_option(&str, &tolerant);
1da177e4
LT
591 else
592 printk("mce= argument %s ignored. Please use /sys", str);
9b41046c 593 return 1;
1da177e4
LT
594}
595
596__setup("nomce", mcheck_disable);
597__setup("mce", mcheck_enable);
598
599/*
600 * Sysfs support
601 */
602
413588c7
AK
603/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
604 Only one CPU is active at this time, the others get readded later using
605 CPU hotplug. */
1da177e4
LT
606static int mce_resume(struct sys_device *dev)
607{
413588c7 608 mce_init(NULL);
1da177e4
LT
609 return 0;
610}
611
612/* Reinit MCEs after user configuration changes */
613static void mce_restart(void)
614{
8a336b0a 615 if (next_interval)
1da177e4
LT
616 cancel_delayed_work(&mcheck_work);
617 /* Timer race is harmless here */
618 on_each_cpu(mce_init, NULL, 1, 1);
8a336b0a
TH
619 next_interval = check_interval * HZ;
620 if (next_interval)
621 schedule_delayed_work(&mcheck_work, next_interval);
1da177e4
LT
622}
623
624static struct sysdev_class mce_sysclass = {
625 .resume = mce_resume,
626 set_kset_name("machinecheck"),
627};
628
fff2e89f 629DEFINE_PER_CPU(struct sys_device, device_mce);
1da177e4
LT
630
631/* Why are there no generic functions for this? */
632#define ACCESSOR(name, var, start) \
633 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
634 return sprintf(buf, "%lx\n", (unsigned long)var); \
635 } \
636 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
637 char *end; \
638 unsigned long new = simple_strtoul(buf, &end, 0); \
639 if (end == buf) return -EINVAL; \
640 var = new; \
641 start; \
642 return end-buf; \
643 } \
644 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
645
a98f0dd3 646/* TBD should generate these dynamically based on number of available banks */
1da177e4
LT
647ACCESSOR(bank0ctl,bank[0],mce_restart())
648ACCESSOR(bank1ctl,bank[1],mce_restart())
649ACCESSOR(bank2ctl,bank[2],mce_restart())
650ACCESSOR(bank3ctl,bank[3],mce_restart())
651ACCESSOR(bank4ctl,bank[4],mce_restart())
73ca5358 652ACCESSOR(bank5ctl,bank[5],mce_restart())
a98f0dd3
AK
653
654static ssize_t show_trigger(struct sys_device *s, char *buf)
655{
656 strcpy(buf, trigger);
657 strcat(buf, "\n");
658 return strlen(trigger) + 1;
659}
660
661static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
662{
663 char *p;
664 int len;
665 strncpy(trigger, buf, sizeof(trigger));
666 trigger[sizeof(trigger)-1] = 0;
667 len = strlen(trigger);
668 p = strchr(trigger, '\n');
669 if (*p) *p = 0;
670 return len;
671}
672
673static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
1da177e4
LT
674ACCESSOR(tolerant,tolerant,)
675ACCESSOR(check_interval,check_interval,mce_restart())
a98f0dd3
AK
676static struct sysdev_attribute *mce_attributes[] = {
677 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
678 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
679 &attr_tolerant, &attr_check_interval, &attr_trigger,
680 NULL
681};
1da177e4 682
91c6d400
AK
683/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
684static __cpuinit int mce_create_device(unsigned int cpu)
1da177e4
LT
685{
686 int err;
73ca5358 687 int i;
91c6d400
AK
688 if (!mce_available(&cpu_data[cpu]))
689 return -EIO;
690
691 per_cpu(device_mce,cpu).id = cpu;
692 per_cpu(device_mce,cpu).cls = &mce_sysclass;
693
694 err = sysdev_register(&per_cpu(device_mce,cpu));
695
696 if (!err) {
a98f0dd3 697 for (i = 0; mce_attributes[i]; i++)
73ca5358 698 sysdev_create_file(&per_cpu(device_mce,cpu),
a98f0dd3 699 mce_attributes[i]);
91c6d400
AK
700 }
701 return err;
702}
703
be6b5a35 704static void mce_remove_device(unsigned int cpu)
91c6d400 705{
73ca5358
SL
706 int i;
707
a98f0dd3 708 for (i = 0; mce_attributes[i]; i++)
73ca5358 709 sysdev_remove_file(&per_cpu(device_mce,cpu),
a98f0dd3 710 mce_attributes[i]);
91c6d400 711 sysdev_unregister(&per_cpu(device_mce,cpu));
d4c45718 712 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
91c6d400 713}
91c6d400
AK
714
715/* Get notified when a cpu comes on/off. Be hotplug friendly. */
be6b5a35 716static int
91c6d400
AK
717mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
718{
719 unsigned int cpu = (unsigned long)hcpu;
720
721 switch (action) {
722 case CPU_ONLINE:
723 mce_create_device(cpu);
724 break;
91c6d400
AK
725 case CPU_DEAD:
726 mce_remove_device(cpu);
727 break;
91c6d400
AK
728 }
729 return NOTIFY_OK;
730}
731
be6b5a35 732static struct notifier_block mce_cpu_notifier = {
91c6d400
AK
733 .notifier_call = mce_cpu_callback,
734};
735
736static __init int mce_init_device(void)
737{
738 int err;
739 int i = 0;
740
1da177e4
LT
741 if (!mce_available(&boot_cpu_data))
742 return -EIO;
743 err = sysdev_class_register(&mce_sysclass);
91c6d400
AK
744
745 for_each_online_cpu(i) {
746 mce_create_device(i);
747 }
748
be6b5a35 749 register_hotcpu_notifier(&mce_cpu_notifier);
1da177e4
LT
750 misc_register(&mce_log_device);
751 return err;
1da177e4 752}
91c6d400 753
1da177e4 754device_initcall(mce_init_device);