Commit | Line | Data |
---|---|---|
77c5f5d2 MCC |
1 | /* |
2 | * GHES/EDAC Linux driver | |
3 | * | |
4 | * This file may be distributed under the terms of the GNU General Public | |
5 | * License version 2. | |
6 | * | |
7 | * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> | |
8 | * | |
9 | * Red Hat Inc. http://www.redhat.com | |
10 | */ | |
11 | ||
d2a68566 MCC |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
77c5f5d2 MCC |
14 | #include <acpi/ghes.h> |
15 | #include <linux/edac.h> | |
32fa1f53 | 16 | #include <linux/dmi.h> |
77c5f5d2 MCC |
17 | #include "edac_core.h" |
18 | ||
77c5f5d2 MCC |
19 | #define GHES_EDAC_REVISION " Ver: 1.0.0" |
20 | ||
21 | struct ghes_edac_pvt { | |
22 | struct list_head list; | |
23 | struct ghes *ghes; | |
24 | struct mem_ctl_info *mci; | |
25 | }; | |
26 | ||
27 | static LIST_HEAD(ghes_reglist); | |
28 | static DEFINE_MUTEX(ghes_edac_lock); | |
29 | static int ghes_edac_mc_num; | |
30 | ||
d2a68566 | 31 | |
32fa1f53 MCC |
32 | /* Memory Device - Type 17 of SMBIOS spec */ |
33 | struct memdev_dmi_entry { | |
34 | u8 type; | |
35 | u8 length; | |
36 | u16 handle; | |
37 | u16 phys_mem_array_handle; | |
38 | u16 mem_err_info_handle; | |
39 | u16 total_width; | |
40 | u16 data_width; | |
41 | u16 size; | |
42 | u8 form_factor; | |
43 | u8 device_set; | |
44 | u8 device_locator; | |
45 | u8 bank_locator; | |
46 | u8 memory_type; | |
47 | u16 type_detail; | |
48 | u16 speed; | |
49 | u8 manufacturer; | |
50 | u8 serial_number; | |
51 | u8 asset_tag; | |
52 | u8 part_number; | |
53 | u8 attributes; | |
54 | u32 extended_size; | |
55 | u16 conf_mem_clk_speed; | |
56 | } __attribute__((__packed__)); | |
57 | ||
58 | struct ghes_edac_dimm_fill { | |
59 | struct mem_ctl_info *mci; | |
60 | unsigned count; | |
61 | }; | |
62 | ||
63 | char *memory_type[] = { | |
64 | [MEM_EMPTY] = "EMPTY", | |
65 | [MEM_RESERVED] = "RESERVED", | |
66 | [MEM_UNKNOWN] = "UNKNOWN", | |
67 | [MEM_FPM] = "FPM", | |
68 | [MEM_EDO] = "EDO", | |
69 | [MEM_BEDO] = "BEDO", | |
70 | [MEM_SDR] = "SDR", | |
71 | [MEM_RDR] = "RDR", | |
72 | [MEM_DDR] = "DDR", | |
73 | [MEM_RDDR] = "RDDR", | |
74 | [MEM_RMBS] = "RMBS", | |
75 | [MEM_DDR2] = "DDR2", | |
76 | [MEM_FB_DDR2] = "FB_DDR2", | |
77 | [MEM_RDDR2] = "RDDR2", | |
78 | [MEM_XDR] = "XDR", | |
79 | [MEM_DDR3] = "DDR3", | |
80 | [MEM_RDDR3] = "RDDR3", | |
81 | }; | |
82 | ||
83 | static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) | |
84 | { | |
85 | int *num_dimm = arg; | |
86 | ||
87 | if (dh->type == DMI_ENTRY_MEM_DEVICE) | |
88 | (*num_dimm)++; | |
89 | } | |
90 | ||
91 | static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) | |
92 | { | |
93 | struct ghes_edac_dimm_fill *dimm_fill = arg; | |
94 | struct mem_ctl_info *mci = dimm_fill->mci; | |
95 | ||
96 | if (dh->type == DMI_ENTRY_MEM_DEVICE) { | |
97 | struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; | |
98 | struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, | |
99 | mci->n_layers, | |
100 | dimm_fill->count, 0, 0); | |
101 | ||
102 | if (entry->size == 0xffff) { | |
d2a68566 MCC |
103 | pr_info("Can't get DIMM%i size\n", |
104 | dimm_fill->count); | |
32fa1f53 MCC |
105 | dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ |
106 | } else if (entry->size == 0x7fff) { | |
107 | dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); | |
108 | } else { | |
109 | if (entry->size & 1 << 15) | |
110 | dimm->nr_pages = MiB_TO_PAGES((entry->size & | |
111 | 0x7fff) << 10); | |
112 | else | |
113 | dimm->nr_pages = MiB_TO_PAGES(entry->size); | |
114 | } | |
115 | ||
116 | switch (entry->memory_type) { | |
117 | case 0x12: | |
118 | if (entry->type_detail & 1 << 13) | |
119 | dimm->mtype = MEM_RDDR; | |
120 | else | |
121 | dimm->mtype = MEM_DDR; | |
122 | break; | |
123 | case 0x13: | |
124 | if (entry->type_detail & 1 << 13) | |
125 | dimm->mtype = MEM_RDDR2; | |
126 | else | |
127 | dimm->mtype = MEM_DDR2; | |
128 | break; | |
129 | case 0x14: | |
130 | dimm->mtype = MEM_FB_DDR2; | |
131 | break; | |
132 | case 0x18: | |
133 | if (entry->type_detail & 1 << 13) | |
134 | dimm->mtype = MEM_RDDR3; | |
135 | else | |
136 | dimm->mtype = MEM_DDR3; | |
137 | break; | |
138 | default: | |
139 | if (entry->type_detail & 1 << 6) | |
140 | dimm->mtype = MEM_RMBS; | |
141 | else if ((entry->type_detail & ((1 << 7) | (1 << 13))) | |
142 | == ((1 << 7) | (1 << 13))) | |
143 | dimm->mtype = MEM_RDR; | |
144 | else if (entry->type_detail & 1 << 7) | |
145 | dimm->mtype = MEM_SDR; | |
146 | else if (entry->type_detail & 1 << 9) | |
147 | dimm->mtype = MEM_EDO; | |
148 | else | |
149 | dimm->mtype = MEM_UNKNOWN; | |
150 | } | |
151 | ||
152 | /* | |
153 | * Actually, we can only detect if the memory has bits for | |
154 | * checksum or not | |
155 | */ | |
156 | if (entry->total_width == entry->data_width) | |
157 | dimm->edac_mode = EDAC_NONE; | |
158 | else | |
159 | dimm->edac_mode = EDAC_SECDED; | |
160 | ||
161 | dimm->dtype = DEV_UNKNOWN; | |
162 | dimm->grain = 128; /* Likely, worse case */ | |
163 | ||
164 | /* | |
165 | * FIXME: It shouldn't be hard to also fill the DIMM labels | |
166 | */ | |
167 | ||
168 | if (dimm->nr_pages) { | |
d2a68566 | 169 | edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", |
32fa1f53 MCC |
170 | dimm_fill->count, memory_type[dimm->mtype], |
171 | PAGES_TO_MiB(dimm->nr_pages), | |
172 | (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); | |
d2a68566 | 173 | edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", |
32fa1f53 MCC |
174 | entry->memory_type, entry->type_detail, |
175 | entry->total_width, entry->data_width); | |
176 | } | |
177 | ||
178 | dimm_fill->count++; | |
179 | } | |
180 | } | |
181 | ||
77c5f5d2 | 182 | void ghes_edac_report_mem_error(struct ghes *ghes, int sev, |
f04c62a7 | 183 | struct cper_sec_mem_err *mem_err) |
77c5f5d2 | 184 | { |
f04c62a7 MCC |
185 | enum hw_event_mc_err_type type; |
186 | struct edac_raw_error_desc *e; | |
187 | struct mem_ctl_info *mci; | |
188 | struct ghes_edac_pvt *pvt = NULL; | |
189 | ||
190 | list_for_each_entry(pvt, &ghes_reglist, list) { | |
191 | if (ghes == pvt->ghes) | |
192 | break; | |
193 | } | |
194 | if (!pvt) { | |
195 | pr_err("Internal error: Can't find EDAC structure\n"); | |
196 | return; | |
197 | } | |
198 | mci = pvt->mci; | |
199 | e = &mci->error_desc; | |
200 | ||
201 | /* Cleans the error report buffer */ | |
202 | memset(e, 0, sizeof (*e)); | |
203 | e->error_count = 1; | |
204 | e->msg = "APEI"; | |
205 | strcpy(e->label, "unknown"); | |
206 | e->other_detail = ""; | |
207 | ||
208 | if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | |
209 | e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; | |
210 | e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; | |
211 | e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); | |
212 | } | |
213 | ||
214 | switch (sev) { | |
215 | case GHES_SEV_CORRECTED: | |
216 | type = HW_EVENT_ERR_CORRECTED; | |
217 | break; | |
218 | case GHES_SEV_RECOVERABLE: | |
219 | type = HW_EVENT_ERR_UNCORRECTED; | |
220 | break; | |
221 | case GHES_SEV_PANIC: | |
222 | type = HW_EVENT_ERR_FATAL; | |
223 | break; | |
224 | default: | |
225 | case GHES_SEV_NO: | |
226 | type = HW_EVENT_ERR_INFO; | |
227 | } | |
228 | ||
229 | sprintf(e->location, | |
230 | "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d", | |
231 | mem_err->node, mem_err->card, mem_err->module, | |
232 | mem_err->bank, mem_err->device, mem_err->row, mem_err->column, | |
233 | mem_err->bit_pos); | |
234 | edac_dbg(3, "error at location %s\n", e->location); | |
235 | ||
236 | edac_raw_mc_handle_error(type, mci, e); | |
77c5f5d2 MCC |
237 | } |
238 | EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); | |
239 | ||
240 | int ghes_edac_register(struct ghes *ghes, struct device *dev) | |
241 | { | |
32fa1f53 MCC |
242 | bool fake = false; |
243 | int rc, num_dimm = 0; | |
77c5f5d2 MCC |
244 | struct mem_ctl_info *mci; |
245 | struct edac_mc_layer layers[1]; | |
77c5f5d2 | 246 | struct ghes_edac_pvt *pvt; |
32fa1f53 MCC |
247 | struct ghes_edac_dimm_fill dimm_fill; |
248 | ||
249 | /* Get the number of DIMMs */ | |
250 | dmi_walk(ghes_edac_count_dimms, &num_dimm); | |
251 | ||
252 | /* Check if we've got a bogus BIOS */ | |
253 | if (num_dimm == 0) { | |
254 | fake = true; | |
255 | num_dimm = 1; | |
256 | } | |
77c5f5d2 MCC |
257 | |
258 | layers[0].type = EDAC_MC_LAYER_ALL_MEM; | |
32fa1f53 | 259 | layers[0].size = num_dimm; |
77c5f5d2 MCC |
260 | layers[0].is_virt_csrow = true; |
261 | ||
262 | /* | |
263 | * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), | |
264 | * to avoid duplicated memory controller numbers | |
265 | */ | |
266 | mutex_lock(&ghes_edac_lock); | |
267 | mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, | |
268 | sizeof(*pvt)); | |
269 | if (!mci) { | |
d2a68566 | 270 | pr_info("Can't allocate memory for EDAC data\n"); |
77c5f5d2 MCC |
271 | mutex_unlock(&ghes_edac_lock); |
272 | return -ENOMEM; | |
273 | } | |
274 | ||
275 | pvt = mci->pvt_info; | |
276 | memset(pvt, 0, sizeof(*pvt)); | |
f04c62a7 | 277 | list_add_tail(&pvt->list, &ghes_reglist); |
77c5f5d2 MCC |
278 | pvt->ghes = ghes; |
279 | pvt->mci = mci; | |
280 | mci->pdev = dev; | |
281 | ||
282 | mci->mtype_cap = MEM_FLAG_EMPTY; | |
283 | mci->edac_ctl_cap = EDAC_FLAG_NONE; | |
284 | mci->edac_cap = EDAC_FLAG_NONE; | |
285 | mci->mod_name = "ghes_edac.c"; | |
286 | mci->mod_ver = GHES_EDAC_REVISION; | |
287 | mci->ctl_name = "ghes_edac"; | |
288 | mci->dev_name = "ghes"; | |
289 | ||
d2a68566 MCC |
290 | if (!ghes_edac_mc_num) { |
291 | if (!fake) { | |
292 | pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); | |
293 | pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); | |
294 | pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); | |
295 | pr_info("If you find incorrect reports, please contact your hardware vendor\n"); | |
296 | pr_info("to correct its BIOS.\n"); | |
297 | pr_info("This system has %d DIMM sockets.\n", | |
298 | num_dimm); | |
299 | } else { | |
300 | pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); | |
301 | pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); | |
302 | pr_info("work on such system. Use this driver with caution\n"); | |
303 | } | |
304 | } | |
305 | ||
32fa1f53 | 306 | if (!fake) { |
5ee726db MCC |
307 | /* |
308 | * Fill DIMM info from DMI for the memory controller #0 | |
309 | * | |
310 | * Keep it in blank for the other memory controllers, as | |
311 | * there's no reliable way to properly credit each DIMM to | |
312 | * the memory controller, as different BIOSes fill the | |
313 | * DMI bank location fields on different ways | |
314 | */ | |
315 | if (!ghes_edac_mc_num) { | |
316 | dimm_fill.count = 0; | |
317 | dimm_fill.mci = mci; | |
318 | dmi_walk(ghes_edac_dmidecode, &dimm_fill); | |
319 | } | |
32fa1f53 MCC |
320 | } else { |
321 | struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, | |
322 | mci->n_layers, 0, 0, 0); | |
77c5f5d2 | 323 | |
d2a68566 | 324 | dimm->nr_pages = 1; |
32fa1f53 MCC |
325 | dimm->grain = 128; |
326 | dimm->mtype = MEM_UNKNOWN; | |
327 | dimm->dtype = DEV_UNKNOWN; | |
328 | dimm->edac_mode = EDAC_SECDED; | |
329 | } | |
77c5f5d2 MCC |
330 | |
331 | rc = edac_mc_add_mc(mci); | |
332 | if (rc < 0) { | |
d2a68566 | 333 | pr_info("Can't register at EDAC core\n"); |
77c5f5d2 MCC |
334 | edac_mc_free(mci); |
335 | mutex_unlock(&ghes_edac_lock); | |
336 | return -ENODEV; | |
337 | } | |
338 | ||
339 | ghes_edac_mc_num++; | |
340 | mutex_unlock(&ghes_edac_lock); | |
341 | return 0; | |
342 | } | |
343 | EXPORT_SYMBOL_GPL(ghes_edac_register); | |
344 | ||
345 | void ghes_edac_unregister(struct ghes *ghes) | |
346 | { | |
347 | struct mem_ctl_info *mci; | |
348 | struct ghes_edac_pvt *pvt; | |
349 | ||
350 | list_for_each_entry(pvt, &ghes_reglist, list) { | |
351 | if (ghes == pvt->ghes) { | |
352 | mci = pvt->mci; | |
353 | edac_mc_del_mc(mci->pdev); | |
354 | edac_mc_free(mci); | |
355 | list_del(&pvt->list); | |
356 | } | |
357 | } | |
358 | } | |
359 | EXPORT_SYMBOL_GPL(ghes_edac_unregister); |