edac.h: Add generic layers for describing a memory location
[linux-block.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
da9bb1d2 28#include <linux/ctype.h>
c0d12172 29#include <linux/edac.h>
da9bb1d2
AC
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
20bcb7a8 33#include "edac_core.h"
7c9281d7 34#include "edac_module.h"
da9bb1d2 35
da9bb1d2 36/* lock to memory controller's control array */
63b7df91 37static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 38static LIST_HEAD(mc_devices);
da9bb1d2 39
da9bb1d2
AC
40#ifdef CONFIG_EDAC_DEBUG
41
a4b4be3f 42static void edac_mc_dump_channel(struct rank_info *chan)
da9bb1d2
AC
43{
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
da9bb1d2 46 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
a895bf8b
MCC
47 debugf4("\tdimm->ce_count = %d\n", chan->dimm->ce_count);
48 debugf4("\tdimm->label = '%s'\n", chan->dimm->label);
49 debugf4("\tdimm->nr_pages = 0x%x\n", chan->dimm->nr_pages);
da9bb1d2
AC
50}
51
2da1c119 52static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2
AC
53{
54 debugf4("\tcsrow = %p\n", csrow);
55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
079708b9 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
da9bb1d2
AC
57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
079708b9 59 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
da9bb1d2
AC
60 debugf4("\tcsrow->channels = %p\n", csrow->channels);
61 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
62}
63
2da1c119 64static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2
AC
65{
66 debugf3("\tmci = %p\n", mci);
67 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
68 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
69 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
70 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
71 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
72 mci->nr_csrows, mci->csrows);
37f04581 73 debugf3("\tdev = %p\n", mci->dev);
079708b9 74 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
da9bb1d2
AC
75 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
76}
77
24f9a7fe
BP
78#endif /* CONFIG_EDAC_DEBUG */
79
239642fe
BP
80/*
81 * keep those in sync with the enum mem_type
82 */
83const char *edac_mem_types[] = {
84 "Empty csrow",
85 "Reserved csrow type",
86 "Unknown csrow type",
87 "Fast page mode RAM",
88 "Extended data out RAM",
89 "Burst Extended data out RAM",
90 "Single data rate SDRAM",
91 "Registered single data rate SDRAM",
92 "Double data rate SDRAM",
93 "Registered Double data rate SDRAM",
94 "Rambus DRAM",
95 "Unbuffered DDR2 RAM",
96 "Fully buffered DDR2",
97 "Registered DDR2 RAM",
98 "Rambus XDR",
99 "Unbuffered DDR3 RAM",
100 "Registered DDR3 RAM",
101};
102EXPORT_SYMBOL_GPL(edac_mem_types);
103
93e4fe64
MCC
104/**
105 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
106 * @p: pointer to a pointer with the memory offset to be used. At
107 * return, this will be incremented to point to the next offset
108 * @size: Size of the data structure to be reserved
109 * @n_elems: Number of elements that should be reserved
da9bb1d2
AC
110 *
111 * If 'size' is a constant, the compiler will optimize this whole function
93e4fe64
MCC
112 * down to either a no-op or the addition of a constant to the value of '*p'.
113 *
114 * The 'p' pointer is absolutely needed to keep the proper advancing
115 * further in memory to the proper offsets when allocating the struct along
116 * with its embedded structs, as edac_device_alloc_ctl_info() does it
117 * above, for example.
118 *
119 * At return, the pointer 'p' will be incremented to be used on a next call
120 * to this function.
da9bb1d2 121 */
93e4fe64 122void *edac_align_ptr(void **p, unsigned size, int n_elems)
da9bb1d2
AC
123{
124 unsigned align, r;
93e4fe64 125 void *ptr = *p;
da9bb1d2 126
93e4fe64
MCC
127 *p += size * n_elems;
128
129 /*
130 * 'p' can possibly be an unaligned item X such that sizeof(X) is
131 * 'size'. Adjust 'p' so that its alignment is at least as
132 * stringent as what the compiler would provide for X and return
133 * the aligned result.
134 * Here we assume that the alignment of a "long long" is the most
da9bb1d2
AC
135 * stringent alignment that the compiler will ever provide by default.
136 * As far as I know, this is a reasonable assumption.
137 */
138 if (size > sizeof(long))
139 align = sizeof(long long);
140 else if (size > sizeof(int))
141 align = sizeof(long);
142 else if (size > sizeof(short))
143 align = sizeof(int);
144 else if (size > sizeof(char))
145 align = sizeof(short);
146 else
079708b9 147 return (char *)ptr;
da9bb1d2
AC
148
149 r = size % align;
150
151 if (r == 0)
079708b9 152 return (char *)ptr;
da9bb1d2 153
93e4fe64
MCC
154 *p += align - r;
155
7391c6dc 156 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
157}
158
da9bb1d2
AC
159/**
160 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
161 * @size_pvt: size of private storage needed
162 * @nr_csrows: Number of CWROWS needed for this MC
163 * @nr_chans: Number of channels for the MC
164 *
165 * Everything is kmalloc'ed as one big chunk - more efficient.
166 * Only can be used if all structures have the same lifetime - otherwise
167 * you have to allocate and initialize your own structures.
168 *
169 * Use edac_mc_free() to free mc structures allocated by this function.
170 *
171 * Returns:
172 * NULL allocation failed
173 * struct mem_ctl_info pointer
174 */
175struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
b8f6f975 176 unsigned nr_chans, int edac_index)
da9bb1d2 177{
93e4fe64 178 void *ptr = NULL;
da9bb1d2
AC
179 struct mem_ctl_info *mci;
180 struct csrow_info *csi, *csrow;
a4b4be3f 181 struct rank_info *chi, *chp, *chan;
a7d7d2e1 182 struct dimm_info *dimm;
da9bb1d2
AC
183 void *pvt;
184 unsigned size;
185 int row, chn;
8096cfaf 186 int err;
da9bb1d2
AC
187
188 /* Figure out the offsets of the various items from the start of an mc
189 * structure. We want the alignment of each item to be at least as
190 * stringent as what the compiler would provide if we could simply
191 * hardcode everything into a single struct.
192 */
93e4fe64
MCC
193 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
194 csi = edac_align_ptr(&ptr, sizeof(*csi), nr_csrows);
195 chi = edac_align_ptr(&ptr, sizeof(*chi), nr_csrows * nr_chans);
196 dimm = edac_align_ptr(&ptr, sizeof(*dimm), nr_csrows * nr_chans);
197 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
079708b9 198 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 199
8096cfaf
DT
200 mci = kzalloc(size, GFP_KERNEL);
201 if (mci == NULL)
da9bb1d2
AC
202 return NULL;
203
204 /* Adjust pointers so they point within the memory we just allocated
205 * rather than an imaginary chunk of memory located at address 0.
206 */
079708b9 207 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
a4b4be3f 208 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
a7d7d2e1 209 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
079708b9 210 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 211
b8f6f975
DT
212 /* setup index and various internal pointers */
213 mci->mc_idx = edac_index;
da9bb1d2 214 mci->csrows = csi;
a7d7d2e1 215 mci->dimms = dimm;
da9bb1d2
AC
216 mci->pvt_info = pvt;
217 mci->nr_csrows = nr_csrows;
218
a7d7d2e1
MCC
219 /*
220 * For now, assumes that a per-csrow arrangement for dimms.
221 * This will be latter changed.
222 */
223 dimm = mci->dimms;
224
da9bb1d2
AC
225 for (row = 0; row < nr_csrows; row++) {
226 csrow = &csi[row];
227 csrow->csrow_idx = row;
228 csrow->mci = mci;
229 csrow->nr_channels = nr_chans;
230 chp = &chi[row * nr_chans];
231 csrow->channels = chp;
232
233 for (chn = 0; chn < nr_chans; chn++) {
234 chan = &chp[chn];
235 chan->chan_idx = chn;
236 chan->csrow = csrow;
a7d7d2e1
MCC
237
238 mci->csrows[row].channels[chn].dimm = dimm;
239 dimm->csrow = row;
240 dimm->csrow_channel = chn;
241 dimm++;
242 mci->nr_dimms++;
da9bb1d2
AC
243 }
244 }
245
81d87cb1 246 mci->op_state = OP_ALLOC;
6fe1108f 247 INIT_LIST_HEAD(&mci->grp_kobj_list);
81d87cb1 248
8096cfaf
DT
249 /*
250 * Initialize the 'root' kobj for the edac_mc controller
251 */
252 err = edac_mc_register_sysfs_main_kobj(mci);
253 if (err) {
254 kfree(mci);
255 return NULL;
256 }
257
258 /* at this point, the root kobj is valid, and in order to
259 * 'free' the object, then the function:
260 * edac_mc_unregister_sysfs_main_kobj() must be called
261 * which will perform kobj unregistration and the actual free
262 * will occur during the kobject callback operation
263 */
da9bb1d2
AC
264 return mci;
265}
9110540f 266EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 267
da9bb1d2 268/**
8096cfaf
DT
269 * edac_mc_free
270 * 'Free' a previously allocated 'mci' structure
da9bb1d2 271 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
272 */
273void edac_mc_free(struct mem_ctl_info *mci)
274{
bbc560ae
MCC
275 debugf1("%s()\n", __func__);
276
8096cfaf 277 edac_mc_unregister_sysfs_main_kobj(mci);
accf74ff
MCC
278
279 /* free the mci instance memory here */
280 kfree(mci);
da9bb1d2 281}
9110540f 282EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 283
bce19683 284
939747bd 285/**
bce19683
DT
286 * find_mci_by_dev
287 *
288 * scan list of controllers looking for the one that manages
289 * the 'dev' device
939747bd 290 * @dev: pointer to a struct device related with the MCI
bce19683 291 */
939747bd 292struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
293{
294 struct mem_ctl_info *mci;
295 struct list_head *item;
296
537fba28 297 debugf3("%s()\n", __func__);
da9bb1d2
AC
298
299 list_for_each(item, &mc_devices) {
300 mci = list_entry(item, struct mem_ctl_info, link);
301
37f04581 302 if (mci->dev == dev)
da9bb1d2
AC
303 return mci;
304 }
305
306 return NULL;
307}
939747bd 308EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 309
81d87cb1
DJ
310/*
311 * handler for EDAC to check if NMI type handler has asserted interrupt
312 */
313static int edac_mc_assert_error_check_and_clear(void)
314{
66ee2f94 315 int old_state;
81d87cb1 316
079708b9 317 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
318 return 1;
319
66ee2f94
DJ
320 old_state = edac_err_assert;
321 edac_err_assert = 0;
81d87cb1 322
66ee2f94 323 return old_state;
81d87cb1
DJ
324}
325
326/*
327 * edac_mc_workq_function
328 * performs the operation scheduled by a workq request
329 */
81d87cb1
DJ
330static void edac_mc_workq_function(struct work_struct *work_req)
331{
fbeb4384 332 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 333 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
334
335 mutex_lock(&mem_ctls_mutex);
336
bf52fa4a
DT
337 /* if this control struct has movd to offline state, we are done */
338 if (mci->op_state == OP_OFFLINE) {
339 mutex_unlock(&mem_ctls_mutex);
340 return;
341 }
342
81d87cb1
DJ
343 /* Only poll controllers that are running polled and have a check */
344 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
345 mci->edac_check(mci);
346
81d87cb1
DJ
347 mutex_unlock(&mem_ctls_mutex);
348
349 /* Reschedule */
4de78c68 350 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 351 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
352}
353
354/*
355 * edac_mc_workq_setup
356 * initialize a workq item for this mci
357 * passing in the new delay period in msec
bf52fa4a
DT
358 *
359 * locking model:
360 *
361 * called with the mem_ctls_mutex held
81d87cb1 362 */
bf52fa4a 363static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1
DJ
364{
365 debugf0("%s()\n", __func__);
366
bf52fa4a
DT
367 /* if this instance is not in the POLL state, then simply return */
368 if (mci->op_state != OP_RUNNING_POLL)
369 return;
370
81d87cb1 371 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
81d87cb1
DJ
372 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
373}
374
375/*
376 * edac_mc_workq_teardown
377 * stop the workq processing on this mci
bf52fa4a
DT
378 *
379 * locking model:
380 *
381 * called WITHOUT lock held
81d87cb1 382 */
bf52fa4a 383static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
384{
385 int status;
386
00740c58
BP
387 if (mci->op_state != OP_RUNNING_POLL)
388 return;
389
bce19683
DT
390 status = cancel_delayed_work(&mci->work);
391 if (status == 0) {
392 debugf0("%s() not canceled, flush the queue\n",
393 __func__);
bf52fa4a 394
bce19683
DT
395 /* workq instance might be running, wait for it */
396 flush_workqueue(edac_workqueue);
81d87cb1
DJ
397 }
398}
399
400/*
bce19683
DT
401 * edac_mc_reset_delay_period(unsigned long value)
402 *
403 * user space has updated our poll period value, need to
404 * reset our workq delays
81d87cb1 405 */
bce19683 406void edac_mc_reset_delay_period(int value)
81d87cb1 407{
bce19683
DT
408 struct mem_ctl_info *mci;
409 struct list_head *item;
410
411 mutex_lock(&mem_ctls_mutex);
412
413 /* scan the list and turn off all workq timers, doing so under lock
414 */
415 list_for_each(item, &mc_devices) {
416 mci = list_entry(item, struct mem_ctl_info, link);
417
418 if (mci->op_state == OP_RUNNING_POLL)
419 cancel_delayed_work(&mci->work);
420 }
421
422 mutex_unlock(&mem_ctls_mutex);
81d87cb1 423
bce19683
DT
424
425 /* re-walk the list, and reset the poll delay */
bf52fa4a
DT
426 mutex_lock(&mem_ctls_mutex);
427
bce19683
DT
428 list_for_each(item, &mc_devices) {
429 mci = list_entry(item, struct mem_ctl_info, link);
430
431 edac_mc_workq_setup(mci, (unsigned long) value);
432 }
81d87cb1
DJ
433
434 mutex_unlock(&mem_ctls_mutex);
435}
436
bce19683
DT
437
438
2d7bbb91
DT
439/* Return 0 on success, 1 on failure.
440 * Before calling this function, caller must
441 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
442 *
443 * locking model:
444 *
445 * called with the mem_ctls_mutex lock held
2d7bbb91 446 */
079708b9 447static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
448{
449 struct list_head *item, *insert_before;
450 struct mem_ctl_info *p;
da9bb1d2 451
2d7bbb91 452 insert_before = &mc_devices;
da9bb1d2 453
bf52fa4a
DT
454 p = find_mci_by_dev(mci->dev);
455 if (unlikely(p != NULL))
2d7bbb91 456 goto fail0;
da9bb1d2 457
2d7bbb91
DT
458 list_for_each(item, &mc_devices) {
459 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 460
2d7bbb91
DT
461 if (p->mc_idx >= mci->mc_idx) {
462 if (unlikely(p->mc_idx == mci->mc_idx))
463 goto fail1;
da9bb1d2 464
2d7bbb91
DT
465 insert_before = item;
466 break;
da9bb1d2 467 }
da9bb1d2
AC
468 }
469
470 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 471 atomic_inc(&edac_handlers);
da9bb1d2 472 return 0;
2d7bbb91 473
052dfb45 474fail0:
2d7bbb91 475 edac_printk(KERN_WARNING, EDAC_MC,
281efb17 476 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
17aa7e03 477 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
478 return 1;
479
052dfb45 480fail1:
2d7bbb91 481 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
482 "bug in low-level driver: attempt to assign\n"
483 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 484 return 1;
da9bb1d2
AC
485}
486
e7ecd891 487static void del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 488{
c0d12172 489 atomic_dec(&edac_handlers);
a1d03fcc 490 list_del_rcu(&mci->link);
e2e77098
LJ
491
492 /* these are for safe removal of devices from global list while
493 * NMI handlers may be traversing list
494 */
495 synchronize_rcu();
496 INIT_LIST_HEAD(&mci->link);
a1d03fcc
DP
497}
498
5da0831c
DT
499/**
500 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
501 *
502 * If found, return a pointer to the structure.
503 * Else return NULL.
504 *
505 * Caller must hold mem_ctls_mutex.
506 */
079708b9 507struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
508{
509 struct list_head *item;
510 struct mem_ctl_info *mci;
511
512 list_for_each(item, &mc_devices) {
513 mci = list_entry(item, struct mem_ctl_info, link);
514
515 if (mci->mc_idx >= idx) {
516 if (mci->mc_idx == idx)
517 return mci;
518
519 break;
520 }
521 }
522
523 return NULL;
524}
525EXPORT_SYMBOL(edac_mc_find);
526
da9bb1d2 527/**
472678eb
DP
528 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
529 * create sysfs entries associated with mci structure
da9bb1d2 530 * @mci: pointer to the mci structure to be added to the list
2d7bbb91 531 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
da9bb1d2
AC
532 *
533 * Return:
534 * 0 Success
535 * !0 Failure
536 */
537
538/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 539int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 540{
537fba28 541 debugf0("%s()\n", __func__);
b8f6f975 542
da9bb1d2
AC
543#ifdef CONFIG_EDAC_DEBUG
544 if (edac_debug_level >= 3)
545 edac_mc_dump_mci(mci);
e7ecd891 546
da9bb1d2
AC
547 if (edac_debug_level >= 4) {
548 int i;
549
550 for (i = 0; i < mci->nr_csrows; i++) {
551 int j;
e7ecd891 552
da9bb1d2
AC
553 edac_mc_dump_csrow(&mci->csrows[i]);
554 for (j = 0; j < mci->csrows[i].nr_channels; j++)
079708b9 555 edac_mc_dump_channel(&mci->csrows[i].
052dfb45 556 channels[j]);
da9bb1d2
AC
557 }
558 }
559#endif
63b7df91 560 mutex_lock(&mem_ctls_mutex);
da9bb1d2
AC
561
562 if (add_mc_to_global_list(mci))
028a7b6d 563 goto fail0;
da9bb1d2
AC
564
565 /* set load time so that error rate can be tracked */
566 mci->start_time = jiffies;
567
9794f33d 568 if (edac_create_sysfs_mci_device(mci)) {
569 edac_mc_printk(mci, KERN_WARNING,
052dfb45 570 "failed to create sysfs device\n");
9794f33d 571 goto fail1;
572 }
da9bb1d2 573
81d87cb1
DJ
574 /* If there IS a check routine, then we are running POLLED */
575 if (mci->edac_check != NULL) {
576 /* This instance is NOW RUNNING */
577 mci->op_state = OP_RUNNING_POLL;
578
579 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
580 } else {
581 mci->op_state = OP_RUNNING_INTERRUPT;
582 }
583
da9bb1d2 584 /* Report action taken */
bf52fa4a 585 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 586 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 587
63b7df91 588 mutex_unlock(&mem_ctls_mutex);
028a7b6d 589 return 0;
da9bb1d2 590
052dfb45 591fail1:
028a7b6d
DP
592 del_mc_from_global_list(mci);
593
052dfb45 594fail0:
63b7df91 595 mutex_unlock(&mem_ctls_mutex);
028a7b6d 596 return 1;
da9bb1d2 597}
9110540f 598EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 599
da9bb1d2 600/**
472678eb
DP
601 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
602 * remove mci structure from global list
37f04581 603 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 604 *
18dbc337 605 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 606 */
079708b9 607struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 608{
18dbc337 609 struct mem_ctl_info *mci;
da9bb1d2 610
bf52fa4a
DT
611 debugf0("%s()\n", __func__);
612
63b7df91 613 mutex_lock(&mem_ctls_mutex);
18dbc337 614
bf52fa4a
DT
615 /* find the requested mci struct in the global list */
616 mci = find_mci_by_dev(dev);
617 if (mci == NULL) {
63b7df91 618 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
619 return NULL;
620 }
621
da9bb1d2 622 del_mc_from_global_list(mci);
63b7df91 623 mutex_unlock(&mem_ctls_mutex);
bf52fa4a 624
bb31b312 625 /* flush workq processes */
bf52fa4a 626 edac_mc_workq_teardown(mci);
bb31b312
BP
627
628 /* marking MCI offline */
629 mci->op_state = OP_OFFLINE;
630
631 /* remove from sysfs */
bf52fa4a
DT
632 edac_remove_sysfs_mci_device(mci);
633
537fba28 634 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 635 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 636 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 637
18dbc337 638 return mci;
da9bb1d2 639}
9110540f 640EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 641
2da1c119
AB
642static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
643 u32 size)
da9bb1d2
AC
644{
645 struct page *pg;
646 void *virt_addr;
647 unsigned long flags = 0;
648
537fba28 649 debugf3("%s()\n", __func__);
da9bb1d2
AC
650
651 /* ECC error page was not in our memory. Ignore it. */
079708b9 652 if (!pfn_valid(page))
da9bb1d2
AC
653 return;
654
655 /* Find the actual page structure then map it and fix */
656 pg = pfn_to_page(page);
657
658 if (PageHighMem(pg))
659 local_irq_save(flags);
660
4e5df7ca 661 virt_addr = kmap_atomic(pg);
da9bb1d2
AC
662
663 /* Perform architecture specific atomic scrub operation */
664 atomic_scrub(virt_addr + offset, size);
665
666 /* Unmap and complete */
4e5df7ca 667 kunmap_atomic(virt_addr);
da9bb1d2
AC
668
669 if (PageHighMem(pg))
670 local_irq_restore(flags);
671}
672
da9bb1d2 673/* FIXME - should return -1 */
e7ecd891 674int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2
AC
675{
676 struct csrow_info *csrows = mci->csrows;
a895bf8b 677 int row, i, j, n;
da9bb1d2 678
537fba28 679 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
da9bb1d2
AC
680 row = -1;
681
682 for (i = 0; i < mci->nr_csrows; i++) {
683 struct csrow_info *csrow = &csrows[i];
a895bf8b
MCC
684 n = 0;
685 for (j = 0; j < csrow->nr_channels; j++) {
686 struct dimm_info *dimm = csrow->channels[j].dimm;
687 n += dimm->nr_pages;
688 }
689 if (n == 0)
da9bb1d2
AC
690 continue;
691
537fba28
DP
692 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
693 "mask(0x%lx)\n", mci->mc_idx, __func__,
694 csrow->first_page, page, csrow->last_page,
695 csrow->page_mask);
da9bb1d2
AC
696
697 if ((page >= csrow->first_page) &&
698 (page <= csrow->last_page) &&
699 ((page & csrow->page_mask) ==
700 (csrow->first_page & csrow->page_mask))) {
701 row = i;
702 break;
703 }
704 }
705
706 if (row == -1)
537fba28 707 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
708 "could not look up page error address %lx\n",
709 (unsigned long)page);
da9bb1d2
AC
710
711 return row;
712}
9110540f 713EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 714
da9bb1d2
AC
715/* FIXME - setable log (warning/emerg) levels */
716/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
717void edac_mc_handle_ce(struct mem_ctl_info *mci,
052dfb45
DT
718 unsigned long page_frame_number,
719 unsigned long offset_in_page, unsigned long syndrome,
720 int row, int channel, const char *msg)
da9bb1d2
AC
721{
722 unsigned long remapped_page;
a7d7d2e1 723 char *label = NULL;
084a4fcc 724 u32 grain;
da9bb1d2 725
537fba28 726 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
727
728 /* FIXME - maybe make panic on INTERNAL ERROR an option */
729 if (row >= mci->nr_csrows || row < 0) {
730 /* something is wrong */
537fba28 731 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
732 "INTERNAL ERROR: row out of range "
733 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
734 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
735 return;
736 }
e7ecd891 737
da9bb1d2
AC
738 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
739 /* something is wrong */
537fba28 740 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
741 "INTERNAL ERROR: channel out of range "
742 "(%d >= %d)\n", channel,
743 mci->csrows[row].nr_channels);
da9bb1d2
AC
744 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
745 return;
746 }
747
a7d7d2e1 748 label = mci->csrows[row].channels[channel].dimm->label;
084a4fcc 749 grain = mci->csrows[row].channels[channel].dimm->grain;
a7d7d2e1 750
4de78c68 751 if (edac_mc_get_log_ce())
da9bb1d2 752 /* FIXME - put in DIMM location */
537fba28 753 edac_mc_printk(mci, KERN_WARNING,
052dfb45
DT
754 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
755 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
756 page_frame_number, offset_in_page,
084a4fcc 757 grain, syndrome, row, channel,
a7d7d2e1 758 label, msg);
da9bb1d2
AC
759
760 mci->ce_count++;
761 mci->csrows[row].ce_count++;
084a4fcc 762 mci->csrows[row].channels[channel].dimm->ce_count++;
da9bb1d2
AC
763 mci->csrows[row].channels[channel].ce_count++;
764
765 if (mci->scrub_mode & SCRUB_SW_SRC) {
766 /*
767 * Some MC's can remap memory so that it is still available
768 * at a different address when PCI devices map into memory.
769 * MC's that can't do this lose the memory where PCI devices
25985edc 770 * are mapped. This mapping is MC dependent and so we call
da9bb1d2
AC
771 * back into the MC driver for it to map the MC page to
772 * a physical (CPU) page which can then be mapped to a virtual
773 * page - which can then be scrubbed.
774 */
775 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
776 mci->ctl_page_to_phys(mci, page_frame_number) :
777 page_frame_number;
da9bb1d2 778
084a4fcc 779 edac_mc_scrub_block(remapped_page, offset_in_page, grain);
da9bb1d2
AC
780 }
781}
9110540f 782EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
da9bb1d2 783
e7ecd891 784void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 785{
4de78c68 786 if (edac_mc_get_log_ce())
537fba28 787 edac_mc_printk(mci, KERN_WARNING,
052dfb45 788 "CE - no information available: %s\n", msg);
e7ecd891 789
da9bb1d2
AC
790 mci->ce_noinfo_count++;
791 mci->ce_count++;
792}
9110540f 793EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
da9bb1d2 794
da9bb1d2 795void edac_mc_handle_ue(struct mem_ctl_info *mci,
052dfb45
DT
796 unsigned long page_frame_number,
797 unsigned long offset_in_page, int row, const char *msg)
da9bb1d2
AC
798{
799 int len = EDAC_MC_LABEL_LEN * 4;
800 char labels[len + 1];
801 char *pos = labels;
802 int chan;
803 int chars;
a7d7d2e1 804 char *label = NULL;
084a4fcc 805 u32 grain;
da9bb1d2 806
537fba28 807 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
808
809 /* FIXME - maybe make panic on INTERNAL ERROR an option */
810 if (row >= mci->nr_csrows || row < 0) {
811 /* something is wrong */
537fba28 812 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
813 "INTERNAL ERROR: row out of range "
814 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
815 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
816 return;
817 }
818
084a4fcc 819 grain = mci->csrows[row].channels[0].dimm->grain;
a7d7d2e1
MCC
820 label = mci->csrows[row].channels[0].dimm->label;
821 chars = snprintf(pos, len + 1, "%s", label);
da9bb1d2
AC
822 len -= chars;
823 pos += chars;
e7ecd891 824
da9bb1d2 825 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
052dfb45 826 chan++) {
a7d7d2e1
MCC
827 label = mci->csrows[row].channels[chan].dimm->label;
828 chars = snprintf(pos, len + 1, ":%s", label);
da9bb1d2
AC
829 len -= chars;
830 pos += chars;
831 }
832
4de78c68 833 if (edac_mc_get_log_ue())
537fba28 834 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
835 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
836 "labels \"%s\": %s\n", page_frame_number,
084a4fcc 837 offset_in_page, grain, row, labels, msg);
da9bb1d2 838
4de78c68 839 if (edac_mc_get_panic_on_ue())
e7ecd891 840 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
052dfb45
DT
841 "row %d, labels \"%s\": %s\n", mci->mc_idx,
842 page_frame_number, offset_in_page,
084a4fcc 843 grain, row, labels, msg);
da9bb1d2
AC
844
845 mci->ue_count++;
846 mci->csrows[row].ue_count++;
847}
9110540f 848EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
da9bb1d2 849
e7ecd891 850void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 851{
4de78c68 852 if (edac_mc_get_panic_on_ue())
da9bb1d2
AC
853 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
854
4de78c68 855 if (edac_mc_get_log_ue())
537fba28 856 edac_mc_printk(mci, KERN_WARNING,
052dfb45 857 "UE - no information available: %s\n", msg);
da9bb1d2
AC
858 mci->ue_noinfo_count++;
859 mci->ue_count++;
860}
079708b9 861EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
da9bb1d2 862
9794f33d 863/*************************************************************
864 * On Fully Buffered DIMM modules, this help function is
865 * called to process UE events
866 */
867void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
052dfb45
DT
868 unsigned int csrow,
869 unsigned int channela,
870 unsigned int channelb, char *msg)
9794f33d 871{
872 int len = EDAC_MC_LABEL_LEN * 4;
873 char labels[len + 1];
874 char *pos = labels;
875 int chars;
a7d7d2e1 876 char *label;
9794f33d 877
878 if (csrow >= mci->nr_csrows) {
879 /* something is wrong */
880 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
881 "INTERNAL ERROR: row out of range (%d >= %d)\n",
882 csrow, mci->nr_csrows);
9794f33d 883 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
884 return;
885 }
886
887 if (channela >= mci->csrows[csrow].nr_channels) {
888 /* something is wrong */
889 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
890 "INTERNAL ERROR: channel-a out of range "
891 "(%d >= %d)\n",
892 channela, mci->csrows[csrow].nr_channels);
9794f33d 893 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
894 return;
895 }
896
897 if (channelb >= mci->csrows[csrow].nr_channels) {
898 /* something is wrong */
899 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
900 "INTERNAL ERROR: channel-b out of range "
901 "(%d >= %d)\n",
902 channelb, mci->csrows[csrow].nr_channels);
9794f33d 903 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
904 return;
905 }
906
907 mci->ue_count++;
908 mci->csrows[csrow].ue_count++;
909
910 /* Generate the DIMM labels from the specified channels */
a7d7d2e1
MCC
911 label = mci->csrows[csrow].channels[channela].dimm->label;
912 chars = snprintf(pos, len + 1, "%s", label);
079708b9
DT
913 len -= chars;
914 pos += chars;
084a4fcc 915
9794f33d 916 chars = snprintf(pos, len + 1, "-%s",
a7d7d2e1 917 mci->csrows[csrow].channels[channelb].dimm->label);
9794f33d 918
4de78c68 919 if (edac_mc_get_log_ue())
9794f33d 920 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
921 "UE row %d, channel-a= %d channel-b= %d "
922 "labels \"%s\": %s\n", csrow, channela, channelb,
923 labels, msg);
9794f33d 924
4de78c68 925 if (edac_mc_get_panic_on_ue())
9794f33d 926 panic("UE row %d, channel-a= %d channel-b= %d "
052dfb45
DT
927 "labels \"%s\": %s\n", csrow, channela,
928 channelb, labels, msg);
9794f33d 929}
930EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
931
932/*************************************************************
933 * On Fully Buffered DIMM modules, this help function is
934 * called to process CE events
935 */
936void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
052dfb45 937 unsigned int csrow, unsigned int channel, char *msg)
9794f33d 938{
a7d7d2e1 939 char *label = NULL;
9794f33d 940
941 /* Ensure boundary values */
942 if (csrow >= mci->nr_csrows) {
943 /* something is wrong */
944 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
945 "INTERNAL ERROR: row out of range (%d >= %d)\n",
946 csrow, mci->nr_csrows);
9794f33d 947 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
948 return;
949 }
950 if (channel >= mci->csrows[csrow].nr_channels) {
951 /* something is wrong */
952 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
953 "INTERNAL ERROR: channel out of range (%d >= %d)\n",
954 channel, mci->csrows[csrow].nr_channels);
9794f33d 955 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
956 return;
957 }
958
a7d7d2e1
MCC
959 label = mci->csrows[csrow].channels[channel].dimm->label;
960
4de78c68 961 if (edac_mc_get_log_ce())
9794f33d 962 /* FIXME - put in DIMM location */
963 edac_mc_printk(mci, KERN_WARNING,
052dfb45 964 "CE row %d, channel %d, label \"%s\": %s\n",
a7d7d2e1 965 csrow, channel, label, msg);
9794f33d 966
967 mci->ce_count++;
968 mci->csrows[csrow].ce_count++;
084a4fcc 969 mci->csrows[csrow].channels[channel].dimm->ce_count++;
9794f33d 970 mci->csrows[csrow].channels[channel].ce_count++;
971}
079708b9 972EXPORT_SYMBOL(edac_mc_handle_fbd_ce);