edac: Remove the legacy EDAC ABI
[linux-block.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
da9bb1d2 28#include <linux/ctype.h>
c0d12172 29#include <linux/edac.h>
da9bb1d2
AC
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
20bcb7a8 33#include "edac_core.h"
7c9281d7 34#include "edac_module.h"
da9bb1d2 35
da9bb1d2 36/* lock to memory controller's control array */
63b7df91 37static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 38static LIST_HEAD(mc_devices);
da9bb1d2 39
da9bb1d2
AC
40#ifdef CONFIG_EDAC_DEBUG
41
a4b4be3f 42static void edac_mc_dump_channel(struct rank_info *chan)
da9bb1d2
AC
43{
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
da9bb1d2 46 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
4275be63
MCC
47 debugf4("\tchannel->dimm = %p\n", chan->dimm);
48}
49
50static void edac_mc_dump_dimm(struct dimm_info *dimm)
51{
52 int i;
53
54 debugf4("\tdimm = %p\n", dimm);
55 debugf4("\tdimm->label = '%s'\n", dimm->label);
56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
57 debugf4("\tdimm location ");
58 for (i = 0; i < dimm->mci->n_layers; i++) {
59 printk(KERN_CONT "%d", dimm->location[i]);
60 if (i < dimm->mci->n_layers - 1)
61 printk(KERN_CONT ".");
62 }
63 printk(KERN_CONT "\n");
64 debugf4("\tdimm->grain = %d\n", dimm->grain);
65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
da9bb1d2
AC
66}
67
2da1c119 68static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2
AC
69{
70 debugf4("\tcsrow = %p\n", csrow);
71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
079708b9 72 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
da9bb1d2
AC
73 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
079708b9 75 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
da9bb1d2
AC
76 debugf4("\tcsrow->channels = %p\n", csrow->channels);
77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
78}
79
2da1c119 80static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2
AC
81{
82 debugf3("\tmci = %p\n", mci);
83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
86 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
88 mci->nr_csrows, mci->csrows);
4275be63
MCC
89 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
90 mci->tot_dimms, mci->dimms);
37f04581 91 debugf3("\tdev = %p\n", mci->dev);
079708b9 92 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
da9bb1d2
AC
93 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
94}
95
24f9a7fe
BP
96#endif /* CONFIG_EDAC_DEBUG */
97
239642fe
BP
98/*
99 * keep those in sync with the enum mem_type
100 */
101const char *edac_mem_types[] = {
102 "Empty csrow",
103 "Reserved csrow type",
104 "Unknown csrow type",
105 "Fast page mode RAM",
106 "Extended data out RAM",
107 "Burst Extended data out RAM",
108 "Single data rate SDRAM",
109 "Registered single data rate SDRAM",
110 "Double data rate SDRAM",
111 "Registered Double data rate SDRAM",
112 "Rambus DRAM",
113 "Unbuffered DDR2 RAM",
114 "Fully buffered DDR2",
115 "Registered DDR2 RAM",
116 "Rambus XDR",
117 "Unbuffered DDR3 RAM",
118 "Registered DDR3 RAM",
119};
120EXPORT_SYMBOL_GPL(edac_mem_types);
121
93e4fe64
MCC
122/**
123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
124 * @p: pointer to a pointer with the memory offset to be used. At
125 * return, this will be incremented to point to the next offset
126 * @size: Size of the data structure to be reserved
127 * @n_elems: Number of elements that should be reserved
da9bb1d2
AC
128 *
129 * If 'size' is a constant, the compiler will optimize this whole function
93e4fe64
MCC
130 * down to either a no-op or the addition of a constant to the value of '*p'.
131 *
132 * The 'p' pointer is absolutely needed to keep the proper advancing
133 * further in memory to the proper offsets when allocating the struct along
134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
135 * above, for example.
136 *
137 * At return, the pointer 'p' will be incremented to be used on a next call
138 * to this function.
da9bb1d2 139 */
93e4fe64 140void *edac_align_ptr(void **p, unsigned size, int n_elems)
da9bb1d2
AC
141{
142 unsigned align, r;
93e4fe64 143 void *ptr = *p;
da9bb1d2 144
93e4fe64
MCC
145 *p += size * n_elems;
146
147 /*
148 * 'p' can possibly be an unaligned item X such that sizeof(X) is
149 * 'size'. Adjust 'p' so that its alignment is at least as
150 * stringent as what the compiler would provide for X and return
151 * the aligned result.
152 * Here we assume that the alignment of a "long long" is the most
da9bb1d2
AC
153 * stringent alignment that the compiler will ever provide by default.
154 * As far as I know, this is a reasonable assumption.
155 */
156 if (size > sizeof(long))
157 align = sizeof(long long);
158 else if (size > sizeof(int))
159 align = sizeof(long);
160 else if (size > sizeof(short))
161 align = sizeof(int);
162 else if (size > sizeof(char))
163 align = sizeof(short);
164 else
079708b9 165 return (char *)ptr;
da9bb1d2
AC
166
167 r = size % align;
168
169 if (r == 0)
079708b9 170 return (char *)ptr;
da9bb1d2 171
93e4fe64
MCC
172 *p += align - r;
173
7391c6dc 174 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
175}
176
da9bb1d2 177/**
4275be63
MCC
178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
179 * @mc_num: Memory controller number
180 * @n_layers: Number of MC hierarchy layers
181 * layers: Describes each layer as seen by the Memory Controller
182 * @size_pvt: size of private storage needed
183 *
da9bb1d2
AC
184 *
185 * Everything is kmalloc'ed as one big chunk - more efficient.
186 * Only can be used if all structures have the same lifetime - otherwise
187 * you have to allocate and initialize your own structures.
188 *
189 * Use edac_mc_free() to free mc structures allocated by this function.
190 *
4275be63
MCC
191 * NOTE: drivers handle multi-rank memories in different ways: in some
192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
193 * others, a single multi-rank memory stick would be mapped into several
194 * entries. Currently, this function will allocate multiple struct dimm_info
195 * on such scenarios, as grouping the multiple ranks require drivers change.
196 *
da9bb1d2 197 * Returns:
ca0907b9
MCC
198 * On failure: NULL
199 * On success: struct mem_ctl_info pointer
da9bb1d2 200 */
ca0907b9
MCC
201struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
202 unsigned n_layers,
203 struct edac_mc_layer *layers,
204 unsigned sz_pvt)
da9bb1d2
AC
205{
206 struct mem_ctl_info *mci;
4275be63
MCC
207 struct edac_mc_layer *layer;
208 struct csrow_info *csi, *csr;
a4b4be3f 209 struct rank_info *chi, *chp, *chan;
a7d7d2e1 210 struct dimm_info *dimm;
4275be63
MCC
211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 unsigned pos[EDAC_MAX_LAYERS];
213 void *pvt, *ptr = NULL;
214 unsigned size, tot_dimms = 1, count = 1;
215 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
216 int i, j, err, row, chn;
217 bool per_rank = false;
218
219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220 /*
221 * Calculate the total amount of dimms and csrows/cschannels while
222 * in the old API emulation mode
223 */
224 for (i = 0; i < n_layers; i++) {
225 tot_dimms *= layers[i].size;
226 if (layers[i].is_virt_csrow)
227 tot_csrows *= layers[i].size;
228 else
229 tot_channels *= layers[i].size;
230
231 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
232 per_rank = true;
233 }
da9bb1d2
AC
234
235 /* Figure out the offsets of the various items from the start of an mc
236 * structure. We want the alignment of each item to be at least as
237 * stringent as what the compiler would provide if we could simply
238 * hardcode everything into a single struct.
239 */
93e4fe64 240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
4275be63
MCC
241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245 for (i = 0; i < n_layers; i++) {
246 count *= layers[i].size;
247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250 tot_errcount += 2 * count;
251 }
252
253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
93e4fe64 254 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
079708b9 255 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 256
4275be63
MCC
257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258 __func__, size,
259 tot_dimms,
260 per_rank ? "ranks" : "dimms",
261 tot_csrows * tot_channels);
8096cfaf
DT
262 mci = kzalloc(size, GFP_KERNEL);
263 if (mci == NULL)
da9bb1d2
AC
264 return NULL;
265
266 /* Adjust pointers so they point within the memory we just allocated
267 * rather than an imaginary chunk of memory located at address 0.
268 */
4275be63 269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
079708b9 270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
a4b4be3f 271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
a7d7d2e1 272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
4275be63
MCC
273 for (i = 0; i < n_layers; i++) {
274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276 }
079708b9 277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 278
b8f6f975 279 /* setup index and various internal pointers */
4275be63 280 mci->mc_idx = mc_num;
da9bb1d2 281 mci->csrows = csi;
a7d7d2e1 282 mci->dimms = dimm;
4275be63 283 mci->tot_dimms = tot_dimms;
da9bb1d2 284 mci->pvt_info = pvt;
4275be63
MCC
285 mci->n_layers = n_layers;
286 mci->layers = layer;
287 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
288 mci->nr_csrows = tot_csrows;
289 mci->num_cschannel = tot_channels;
290 mci->mem_is_per_rank = per_rank;
da9bb1d2 291
a7d7d2e1 292 /*
4275be63 293 * Fill the csrow struct
a7d7d2e1 294 */
4275be63
MCC
295 for (row = 0; row < tot_csrows; row++) {
296 csr = &csi[row];
297 csr->csrow_idx = row;
298 csr->mci = mci;
299 csr->nr_channels = tot_channels;
300 chp = &chi[row * tot_channels];
301 csr->channels = chp;
302
303 for (chn = 0; chn < tot_channels; chn++) {
da9bb1d2
AC
304 chan = &chp[chn];
305 chan->chan_idx = chn;
4275be63
MCC
306 chan->csrow = csr;
307 }
308 }
309
310 /*
311 * Fill the dimm struct
312 */
313 memset(&pos, 0, sizeof(pos));
314 row = 0;
315 chn = 0;
316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317 per_rank ? "ranks" : "dimms");
318 for (i = 0; i < tot_dimms; i++) {
319 chan = &csi[row].channels[chn];
320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
321 pos[0], pos[1], pos[2]);
322 dimm->mci = mci;
323
324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326 pos[0], pos[1], pos[2], row, chn);
327
328 /* Copy DIMM location */
329 for (j = 0; j < n_layers; j++)
330 dimm->location[j] = pos[j];
331
332 /* Link it to the csrows old API data */
333 chan->dimm = dimm;
334 dimm->csrow = row;
335 dimm->cschannel = chn;
336
337 /* Increment csrow location */
338 row++;
339 if (row == tot_csrows) {
340 row = 0;
341 chn++;
342 }
a7d7d2e1 343
4275be63
MCC
344 /* Increment dimm location */
345 for (j = n_layers - 1; j >= 0; j--) {
346 pos[j]++;
347 if (pos[j] < layers[j].size)
348 break;
349 pos[j] = 0;
da9bb1d2
AC
350 }
351 }
352
81d87cb1 353 mci->op_state = OP_ALLOC;
6fe1108f 354 INIT_LIST_HEAD(&mci->grp_kobj_list);
81d87cb1 355
8096cfaf
DT
356 /*
357 * Initialize the 'root' kobj for the edac_mc controller
358 */
359 err = edac_mc_register_sysfs_main_kobj(mci);
360 if (err) {
361 kfree(mci);
362 return NULL;
363 }
364
365 /* at this point, the root kobj is valid, and in order to
366 * 'free' the object, then the function:
367 * edac_mc_unregister_sysfs_main_kobj() must be called
368 * which will perform kobj unregistration and the actual free
369 * will occur during the kobject callback operation
370 */
da9bb1d2 371 return mci;
4275be63 372}
9110540f 373EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 374
da9bb1d2 375/**
8096cfaf
DT
376 * edac_mc_free
377 * 'Free' a previously allocated 'mci' structure
da9bb1d2 378 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
379 */
380void edac_mc_free(struct mem_ctl_info *mci)
381{
bbc560ae
MCC
382 debugf1("%s()\n", __func__);
383
8096cfaf 384 edac_mc_unregister_sysfs_main_kobj(mci);
accf74ff
MCC
385
386 /* free the mci instance memory here */
387 kfree(mci);
da9bb1d2 388}
9110540f 389EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 390
bce19683 391
939747bd 392/**
bce19683
DT
393 * find_mci_by_dev
394 *
395 * scan list of controllers looking for the one that manages
396 * the 'dev' device
939747bd 397 * @dev: pointer to a struct device related with the MCI
bce19683 398 */
939747bd 399struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
400{
401 struct mem_ctl_info *mci;
402 struct list_head *item;
403
537fba28 404 debugf3("%s()\n", __func__);
da9bb1d2
AC
405
406 list_for_each(item, &mc_devices) {
407 mci = list_entry(item, struct mem_ctl_info, link);
408
37f04581 409 if (mci->dev == dev)
da9bb1d2
AC
410 return mci;
411 }
412
413 return NULL;
414}
939747bd 415EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 416
81d87cb1
DJ
417/*
418 * handler for EDAC to check if NMI type handler has asserted interrupt
419 */
420static int edac_mc_assert_error_check_and_clear(void)
421{
66ee2f94 422 int old_state;
81d87cb1 423
079708b9 424 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
425 return 1;
426
66ee2f94
DJ
427 old_state = edac_err_assert;
428 edac_err_assert = 0;
81d87cb1 429
66ee2f94 430 return old_state;
81d87cb1
DJ
431}
432
433/*
434 * edac_mc_workq_function
435 * performs the operation scheduled by a workq request
436 */
81d87cb1
DJ
437static void edac_mc_workq_function(struct work_struct *work_req)
438{
fbeb4384 439 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 440 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
441
442 mutex_lock(&mem_ctls_mutex);
443
bf52fa4a
DT
444 /* if this control struct has movd to offline state, we are done */
445 if (mci->op_state == OP_OFFLINE) {
446 mutex_unlock(&mem_ctls_mutex);
447 return;
448 }
449
81d87cb1
DJ
450 /* Only poll controllers that are running polled and have a check */
451 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
452 mci->edac_check(mci);
453
81d87cb1
DJ
454 mutex_unlock(&mem_ctls_mutex);
455
456 /* Reschedule */
4de78c68 457 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 458 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
459}
460
461/*
462 * edac_mc_workq_setup
463 * initialize a workq item for this mci
464 * passing in the new delay period in msec
bf52fa4a
DT
465 *
466 * locking model:
467 *
468 * called with the mem_ctls_mutex held
81d87cb1 469 */
bf52fa4a 470static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1
DJ
471{
472 debugf0("%s()\n", __func__);
473
bf52fa4a
DT
474 /* if this instance is not in the POLL state, then simply return */
475 if (mci->op_state != OP_RUNNING_POLL)
476 return;
477
81d87cb1 478 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
81d87cb1
DJ
479 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
480}
481
482/*
483 * edac_mc_workq_teardown
484 * stop the workq processing on this mci
bf52fa4a
DT
485 *
486 * locking model:
487 *
488 * called WITHOUT lock held
81d87cb1 489 */
bf52fa4a 490static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
491{
492 int status;
493
00740c58
BP
494 if (mci->op_state != OP_RUNNING_POLL)
495 return;
496
bce19683
DT
497 status = cancel_delayed_work(&mci->work);
498 if (status == 0) {
499 debugf0("%s() not canceled, flush the queue\n",
500 __func__);
bf52fa4a 501
bce19683
DT
502 /* workq instance might be running, wait for it */
503 flush_workqueue(edac_workqueue);
81d87cb1
DJ
504 }
505}
506
507/*
bce19683
DT
508 * edac_mc_reset_delay_period(unsigned long value)
509 *
510 * user space has updated our poll period value, need to
511 * reset our workq delays
81d87cb1 512 */
bce19683 513void edac_mc_reset_delay_period(int value)
81d87cb1 514{
bce19683
DT
515 struct mem_ctl_info *mci;
516 struct list_head *item;
517
518 mutex_lock(&mem_ctls_mutex);
519
520 /* scan the list and turn off all workq timers, doing so under lock
521 */
522 list_for_each(item, &mc_devices) {
523 mci = list_entry(item, struct mem_ctl_info, link);
524
525 if (mci->op_state == OP_RUNNING_POLL)
526 cancel_delayed_work(&mci->work);
527 }
528
529 mutex_unlock(&mem_ctls_mutex);
81d87cb1 530
bce19683
DT
531
532 /* re-walk the list, and reset the poll delay */
bf52fa4a
DT
533 mutex_lock(&mem_ctls_mutex);
534
bce19683
DT
535 list_for_each(item, &mc_devices) {
536 mci = list_entry(item, struct mem_ctl_info, link);
537
538 edac_mc_workq_setup(mci, (unsigned long) value);
539 }
81d87cb1
DJ
540
541 mutex_unlock(&mem_ctls_mutex);
542}
543
bce19683
DT
544
545
2d7bbb91
DT
546/* Return 0 on success, 1 on failure.
547 * Before calling this function, caller must
548 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
549 *
550 * locking model:
551 *
552 * called with the mem_ctls_mutex lock held
2d7bbb91 553 */
079708b9 554static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
555{
556 struct list_head *item, *insert_before;
557 struct mem_ctl_info *p;
da9bb1d2 558
2d7bbb91 559 insert_before = &mc_devices;
da9bb1d2 560
bf52fa4a
DT
561 p = find_mci_by_dev(mci->dev);
562 if (unlikely(p != NULL))
2d7bbb91 563 goto fail0;
da9bb1d2 564
2d7bbb91
DT
565 list_for_each(item, &mc_devices) {
566 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 567
2d7bbb91
DT
568 if (p->mc_idx >= mci->mc_idx) {
569 if (unlikely(p->mc_idx == mci->mc_idx))
570 goto fail1;
da9bb1d2 571
2d7bbb91
DT
572 insert_before = item;
573 break;
da9bb1d2 574 }
da9bb1d2
AC
575 }
576
577 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 578 atomic_inc(&edac_handlers);
da9bb1d2 579 return 0;
2d7bbb91 580
052dfb45 581fail0:
2d7bbb91 582 edac_printk(KERN_WARNING, EDAC_MC,
281efb17 583 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
17aa7e03 584 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
585 return 1;
586
052dfb45 587fail1:
2d7bbb91 588 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
589 "bug in low-level driver: attempt to assign\n"
590 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 591 return 1;
da9bb1d2
AC
592}
593
e7ecd891 594static void del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 595{
c0d12172 596 atomic_dec(&edac_handlers);
a1d03fcc 597 list_del_rcu(&mci->link);
e2e77098
LJ
598
599 /* these are for safe removal of devices from global list while
600 * NMI handlers may be traversing list
601 */
602 synchronize_rcu();
603 INIT_LIST_HEAD(&mci->link);
a1d03fcc
DP
604}
605
5da0831c
DT
606/**
607 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
608 *
609 * If found, return a pointer to the structure.
610 * Else return NULL.
611 *
612 * Caller must hold mem_ctls_mutex.
613 */
079708b9 614struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
615{
616 struct list_head *item;
617 struct mem_ctl_info *mci;
618
619 list_for_each(item, &mc_devices) {
620 mci = list_entry(item, struct mem_ctl_info, link);
621
622 if (mci->mc_idx >= idx) {
623 if (mci->mc_idx == idx)
624 return mci;
625
626 break;
627 }
628 }
629
630 return NULL;
631}
632EXPORT_SYMBOL(edac_mc_find);
633
da9bb1d2 634/**
472678eb
DP
635 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
636 * create sysfs entries associated with mci structure
da9bb1d2
AC
637 * @mci: pointer to the mci structure to be added to the list
638 *
639 * Return:
640 * 0 Success
641 * !0 Failure
642 */
643
644/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 645int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 646{
537fba28 647 debugf0("%s()\n", __func__);
b8f6f975 648
da9bb1d2
AC
649#ifdef CONFIG_EDAC_DEBUG
650 if (edac_debug_level >= 3)
651 edac_mc_dump_mci(mci);
e7ecd891 652
da9bb1d2
AC
653 if (edac_debug_level >= 4) {
654 int i;
655
656 for (i = 0; i < mci->nr_csrows; i++) {
657 int j;
e7ecd891 658
da9bb1d2
AC
659 edac_mc_dump_csrow(&mci->csrows[i]);
660 for (j = 0; j < mci->csrows[i].nr_channels; j++)
079708b9 661 edac_mc_dump_channel(&mci->csrows[i].
052dfb45 662 channels[j]);
da9bb1d2 663 }
4275be63
MCC
664 for (i = 0; i < mci->tot_dimms; i++)
665 edac_mc_dump_dimm(&mci->dimms[i]);
da9bb1d2
AC
666 }
667#endif
63b7df91 668 mutex_lock(&mem_ctls_mutex);
da9bb1d2
AC
669
670 if (add_mc_to_global_list(mci))
028a7b6d 671 goto fail0;
da9bb1d2
AC
672
673 /* set load time so that error rate can be tracked */
674 mci->start_time = jiffies;
675
9794f33d 676 if (edac_create_sysfs_mci_device(mci)) {
677 edac_mc_printk(mci, KERN_WARNING,
052dfb45 678 "failed to create sysfs device\n");
9794f33d 679 goto fail1;
680 }
da9bb1d2 681
81d87cb1
DJ
682 /* If there IS a check routine, then we are running POLLED */
683 if (mci->edac_check != NULL) {
684 /* This instance is NOW RUNNING */
685 mci->op_state = OP_RUNNING_POLL;
686
687 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
688 } else {
689 mci->op_state = OP_RUNNING_INTERRUPT;
690 }
691
da9bb1d2 692 /* Report action taken */
bf52fa4a 693 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 694 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 695
63b7df91 696 mutex_unlock(&mem_ctls_mutex);
028a7b6d 697 return 0;
da9bb1d2 698
052dfb45 699fail1:
028a7b6d
DP
700 del_mc_from_global_list(mci);
701
052dfb45 702fail0:
63b7df91 703 mutex_unlock(&mem_ctls_mutex);
028a7b6d 704 return 1;
da9bb1d2 705}
9110540f 706EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 707
da9bb1d2 708/**
472678eb
DP
709 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
710 * remove mci structure from global list
37f04581 711 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 712 *
18dbc337 713 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 714 */
079708b9 715struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 716{
18dbc337 717 struct mem_ctl_info *mci;
da9bb1d2 718
bf52fa4a
DT
719 debugf0("%s()\n", __func__);
720
63b7df91 721 mutex_lock(&mem_ctls_mutex);
18dbc337 722
bf52fa4a
DT
723 /* find the requested mci struct in the global list */
724 mci = find_mci_by_dev(dev);
725 if (mci == NULL) {
63b7df91 726 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
727 return NULL;
728 }
729
da9bb1d2 730 del_mc_from_global_list(mci);
63b7df91 731 mutex_unlock(&mem_ctls_mutex);
bf52fa4a 732
bb31b312 733 /* flush workq processes */
bf52fa4a 734 edac_mc_workq_teardown(mci);
bb31b312
BP
735
736 /* marking MCI offline */
737 mci->op_state = OP_OFFLINE;
738
739 /* remove from sysfs */
bf52fa4a
DT
740 edac_remove_sysfs_mci_device(mci);
741
537fba28 742 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 743 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 744 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 745
18dbc337 746 return mci;
da9bb1d2 747}
9110540f 748EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 749
2da1c119
AB
750static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
751 u32 size)
da9bb1d2
AC
752{
753 struct page *pg;
754 void *virt_addr;
755 unsigned long flags = 0;
756
537fba28 757 debugf3("%s()\n", __func__);
da9bb1d2
AC
758
759 /* ECC error page was not in our memory. Ignore it. */
079708b9 760 if (!pfn_valid(page))
da9bb1d2
AC
761 return;
762
763 /* Find the actual page structure then map it and fix */
764 pg = pfn_to_page(page);
765
766 if (PageHighMem(pg))
767 local_irq_save(flags);
768
4e5df7ca 769 virt_addr = kmap_atomic(pg);
da9bb1d2
AC
770
771 /* Perform architecture specific atomic scrub operation */
772 atomic_scrub(virt_addr + offset, size);
773
774 /* Unmap and complete */
4e5df7ca 775 kunmap_atomic(virt_addr);
da9bb1d2
AC
776
777 if (PageHighMem(pg))
778 local_irq_restore(flags);
779}
780
da9bb1d2 781/* FIXME - should return -1 */
e7ecd891 782int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2
AC
783{
784 struct csrow_info *csrows = mci->csrows;
a895bf8b 785 int row, i, j, n;
da9bb1d2 786
537fba28 787 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
da9bb1d2
AC
788 row = -1;
789
790 for (i = 0; i < mci->nr_csrows; i++) {
791 struct csrow_info *csrow = &csrows[i];
a895bf8b
MCC
792 n = 0;
793 for (j = 0; j < csrow->nr_channels; j++) {
794 struct dimm_info *dimm = csrow->channels[j].dimm;
795 n += dimm->nr_pages;
796 }
797 if (n == 0)
da9bb1d2
AC
798 continue;
799
537fba28
DP
800 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
801 "mask(0x%lx)\n", mci->mc_idx, __func__,
802 csrow->first_page, page, csrow->last_page,
803 csrow->page_mask);
da9bb1d2
AC
804
805 if ((page >= csrow->first_page) &&
806 (page <= csrow->last_page) &&
807 ((page & csrow->page_mask) ==
808 (csrow->first_page & csrow->page_mask))) {
809 row = i;
810 break;
811 }
812 }
813
814 if (row == -1)
537fba28 815 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
816 "could not look up page error address %lx\n",
817 (unsigned long)page);
da9bb1d2
AC
818
819 return row;
820}
9110540f 821EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 822
4275be63
MCC
823const char *edac_layer_name[] = {
824 [EDAC_MC_LAYER_BRANCH] = "branch",
825 [EDAC_MC_LAYER_CHANNEL] = "channel",
826 [EDAC_MC_LAYER_SLOT] = "slot",
827 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
828};
829EXPORT_SYMBOL_GPL(edac_layer_name);
830
831static void edac_inc_ce_error(struct mem_ctl_info *mci,
832 bool enable_per_layer_report,
833 const int pos[EDAC_MAX_LAYERS])
da9bb1d2 834{
4275be63 835 int i, index = 0;
da9bb1d2 836
4275be63 837 mci->ce_count++;
da9bb1d2 838
4275be63
MCC
839 if (!enable_per_layer_report) {
840 mci->ce_noinfo_count++;
da9bb1d2
AC
841 return;
842 }
e7ecd891 843
4275be63
MCC
844 for (i = 0; i < mci->n_layers; i++) {
845 if (pos[i] < 0)
846 break;
847 index += pos[i];
848 mci->ce_per_layer[i][index]++;
849
850 if (i < mci->n_layers - 1)
851 index *= mci->layers[i + 1].size;
852 }
853}
854
855static void edac_inc_ue_error(struct mem_ctl_info *mci,
856 bool enable_per_layer_report,
857 const int pos[EDAC_MAX_LAYERS])
858{
859 int i, index = 0;
860
861 mci->ue_count++;
862
863 if (!enable_per_layer_report) {
864 mci->ce_noinfo_count++;
da9bb1d2
AC
865 return;
866 }
867
4275be63
MCC
868 for (i = 0; i < mci->n_layers; i++) {
869 if (pos[i] < 0)
870 break;
871 index += pos[i];
872 mci->ue_per_layer[i][index]++;
a7d7d2e1 873
4275be63
MCC
874 if (i < mci->n_layers - 1)
875 index *= mci->layers[i + 1].size;
876 }
877}
da9bb1d2 878
4275be63
MCC
879static void edac_ce_error(struct mem_ctl_info *mci,
880 const int pos[EDAC_MAX_LAYERS],
881 const char *msg,
882 const char *location,
883 const char *label,
884 const char *detail,
885 const char *other_detail,
886 const bool enable_per_layer_report,
887 const unsigned long page_frame_number,
888 const unsigned long offset_in_page,
889 u32 grain)
890{
891 unsigned long remapped_page;
892
893 if (edac_mc_get_log_ce()) {
894 if (other_detail && *other_detail)
895 edac_mc_printk(mci, KERN_WARNING,
896 "CE %s on %s (%s%s - %s)\n",
897 msg, label, location,
898 detail, other_detail);
899 else
900 edac_mc_printk(mci, KERN_WARNING,
901 "CE %s on %s (%s%s)\n",
902 msg, label, location,
903 detail);
904 }
905 edac_inc_ce_error(mci, enable_per_layer_report, pos);
da9bb1d2
AC
906
907 if (mci->scrub_mode & SCRUB_SW_SRC) {
908 /*
4275be63
MCC
909 * Some memory controllers (called MCs below) can remap
910 * memory so that it is still available at a different
911 * address when PCI devices map into memory.
912 * MC's that can't do this, lose the memory where PCI
913 * devices are mapped. This mapping is MC-dependent
914 * and so we call back into the MC driver for it to
915 * map the MC page to a physical (CPU) page which can
916 * then be mapped to a virtual page - which can then
917 * be scrubbed.
918 */
da9bb1d2 919 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
920 mci->ctl_page_to_phys(mci, page_frame_number) :
921 page_frame_number;
da9bb1d2 922
4275be63
MCC
923 edac_mc_scrub_block(remapped_page,
924 offset_in_page, grain);
da9bb1d2
AC
925 }
926}
927
4275be63
MCC
928static void edac_ue_error(struct mem_ctl_info *mci,
929 const int pos[EDAC_MAX_LAYERS],
930 const char *msg,
931 const char *location,
932 const char *label,
933 const char *detail,
934 const char *other_detail,
935 const bool enable_per_layer_report)
da9bb1d2 936{
4275be63
MCC
937 if (edac_mc_get_log_ue()) {
938 if (other_detail && *other_detail)
939 edac_mc_printk(mci, KERN_WARNING,
940 "UE %s on %s (%s%s - %s)\n",
941 msg, label, location, detail,
942 other_detail);
943 else
944 edac_mc_printk(mci, KERN_WARNING,
945 "UE %s on %s (%s%s)\n",
946 msg, label, location, detail);
947 }
e7ecd891 948
4275be63
MCC
949 if (edac_mc_get_panic_on_ue()) {
950 if (other_detail && *other_detail)
951 panic("UE %s on %s (%s%s - %s)\n",
952 msg, label, location, detail, other_detail);
953 else
954 panic("UE %s on %s (%s%s)\n",
955 msg, label, location, detail);
956 }
957
958 edac_inc_ue_error(mci, enable_per_layer_report, pos);
da9bb1d2
AC
959}
960
4275be63
MCC
961#define OTHER_LABEL " or "
962void edac_mc_handle_error(const enum hw_event_mc_err_type type,
963 struct mem_ctl_info *mci,
964 const unsigned long page_frame_number,
965 const unsigned long offset_in_page,
966 const unsigned long syndrome,
967 const int layer0,
968 const int layer1,
969 const int layer2,
970 const char *msg,
971 const char *other_detail,
972 const void *mcelog)
da9bb1d2 973{
4275be63
MCC
974 /* FIXME: too much for stack: move it to some pre-alocated area */
975 char detail[80], location[80];
976 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
977 char *p;
978 int row = -1, chan = -1;
979 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
980 int i;
084a4fcc 981 u32 grain;
4275be63 982 bool enable_per_layer_report = false;
da9bb1d2 983
537fba28 984 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2 985
4275be63
MCC
986 /*
987 * Check if the event report is consistent and if the memory
988 * location is known. If it is known, enable_per_layer_report will be
989 * true, the DIMM(s) label info will be filled and the per-layer
990 * error counters will be incremented.
991 */
992 for (i = 0; i < mci->n_layers; i++) {
993 if (pos[i] >= (int)mci->layers[i].size) {
994 if (type == HW_EVENT_ERR_CORRECTED)
995 p = "CE";
996 else
997 p = "UE";
998
999 edac_mc_printk(mci, KERN_ERR,
1000 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1001 edac_layer_name[mci->layers[i].type],
1002 pos[i], mci->layers[i].size);
1003 /*
1004 * Instead of just returning it, let's use what's
1005 * known about the error. The increment routines and
1006 * the DIMM filter logic will do the right thing by
1007 * pointing the likely damaged DIMMs.
1008 */
1009 pos[i] = -1;
1010 }
1011 if (pos[i] >= 0)
1012 enable_per_layer_report = true;
da9bb1d2
AC
1013 }
1014
4275be63
MCC
1015 /*
1016 * Get the dimm label/grain that applies to the match criteria.
1017 * As the error algorithm may not be able to point to just one memory
1018 * stick, the logic here will get all possible labels that could
1019 * pottentially be affected by the error.
1020 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1021 * to have only the MC channel and the MC dimm (also called "branch")
1022 * but the channel is not known, as the memory is arranged in pairs,
1023 * where each memory belongs to a separate channel within the same
1024 * branch.
1025 */
1026 grain = 0;
1027 p = label;
1028 *p = '\0';
1029 for (i = 0; i < mci->tot_dimms; i++) {
1030 struct dimm_info *dimm = &mci->dimms[i];
da9bb1d2 1031
4275be63
MCC
1032 if (layer0 >= 0 && layer0 != dimm->location[0])
1033 continue;
1034 if (layer1 >= 0 && layer1 != dimm->location[1])
1035 continue;
1036 if (layer2 >= 0 && layer2 != dimm->location[2])
1037 continue;
da9bb1d2 1038
4275be63
MCC
1039 /* get the max grain, over the error match range */
1040 if (dimm->grain > grain)
1041 grain = dimm->grain;
9794f33d 1042
4275be63
MCC
1043 /*
1044 * If the error is memory-controller wide, there's no need to
1045 * seek for the affected DIMMs because the whole
1046 * channel/memory controller/... may be affected.
1047 * Also, don't show errors for empty DIMM slots.
1048 */
1049 if (enable_per_layer_report && dimm->nr_pages) {
1050 if (p != label) {
1051 strcpy(p, OTHER_LABEL);
1052 p += strlen(OTHER_LABEL);
1053 }
1054 strcpy(p, dimm->label);
1055 p += strlen(p);
1056 *p = '\0';
1057
1058 /*
1059 * get csrow/channel of the DIMM, in order to allow
1060 * incrementing the compat API counters
1061 */
1062 debugf4("%s: %s csrows map: (%d,%d)\n",
1063 __func__,
1064 mci->mem_is_per_rank ? "rank" : "dimm",
1065 dimm->csrow, dimm->cschannel);
1066
1067 if (row == -1)
1068 row = dimm->csrow;
1069 else if (row >= 0 && row != dimm->csrow)
1070 row = -2;
1071
1072 if (chan == -1)
1073 chan = dimm->cschannel;
1074 else if (chan >= 0 && chan != dimm->cschannel)
1075 chan = -2;
1076 }
9794f33d 1077 }
1078
4275be63
MCC
1079 if (!enable_per_layer_report) {
1080 strcpy(label, "any memory");
1081 } else {
1082 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1083 __func__, row, chan);
1084 if (p == label)
1085 strcpy(label, "unknown memory");
1086 if (type == HW_EVENT_ERR_CORRECTED) {
1087 if (row >= 0) {
1088 mci->csrows[row].ce_count++;
1089 if (chan >= 0)
1090 mci->csrows[row].channels[chan].ce_count++;
1091 }
1092 } else
1093 if (row >= 0)
1094 mci->csrows[row].ue_count++;
9794f33d 1095 }
1096
4275be63
MCC
1097 /* Fill the RAM location data */
1098 p = location;
1099 for (i = 0; i < mci->n_layers; i++) {
1100 if (pos[i] < 0)
1101 continue;
9794f33d 1102
4275be63
MCC
1103 p += sprintf(p, "%s:%d ",
1104 edac_layer_name[mci->layers[i].type],
1105 pos[i]);
9794f33d 1106 }
a7d7d2e1 1107
4275be63
MCC
1108 /* Memory type dependent details about the error */
1109 if (type == HW_EVENT_ERR_CORRECTED) {
1110 snprintf(detail, sizeof(detail),
1111 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
1112 page_frame_number, offset_in_page,
1113 grain, syndrome);
1114 edac_ce_error(mci, pos, msg, location, label, detail,
1115 other_detail, enable_per_layer_report,
1116 page_frame_number, offset_in_page, grain);
1117 } else {
1118 snprintf(detail, sizeof(detail),
1119 "page:0x%lx offset:0x%lx grain:%d",
1120 page_frame_number, offset_in_page, grain);
9794f33d 1121
4275be63
MCC
1122 edac_ue_error(mci, pos, msg, location, label, detail,
1123 other_detail, enable_per_layer_report);
1124 }
9794f33d 1125}
4275be63 1126EXPORT_SYMBOL_GPL(edac_mc_handle_error);