ACPI, APEI, CPER: Add UEFI 2.4 support for memory error
[linux-2.6-block.git] / drivers / acpi / apei / cper.c
CommitLineData
06d65dea
HY
1/*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
88f074f4 8 * various tables, such as ERST, BERT and HEST etc.
06d65dea
HY
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
147de147 11 * Specification version 2.4.
06d65dea
HY
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/time.h>
30#include <linux/cper.h>
31#include <linux/acpi.h>
1d521000 32#include <linux/pci.h>
c413d768 33#include <linux/aer.h>
06d65dea
HY
34
35/*
36 * CPER record ID need to be unique even after reboot, because record
37 * ID is used as index for ERST storage, while CPER records from
38 * multiple boot may co-exist in ERST.
39 */
40u64 cper_next_record_id(void)
41{
42 static atomic64_t seq;
43
44 if (!atomic64_read(&seq))
45 atomic64_set(&seq, ((u64)get_seconds()) << 32);
46
47 return atomic64_inc_return(&seq);
48}
49EXPORT_SYMBOL_GPL(cper_next_record_id);
50
f59c55d0
HY
51static const char *cper_severity_strs[] = {
52 "recoverable",
53 "fatal",
54 "corrected",
55 "info",
56};
57
58static const char *cper_severity_str(unsigned int severity)
59{
60 return severity < ARRAY_SIZE(cper_severity_strs) ?
61 cper_severity_strs[severity] : "unknown";
62}
63
64/*
65 * cper_print_bits - print strings for set bits
66 * @pfx: prefix for each line, including log level and prefix string
67 * @bits: bit mask
68 * @strs: string array, indexed by bit position
69 * @strs_size: size of the string array: @strs
70 *
71 * For each set bit in @bits, print the corresponding string in @strs.
72 * If the output length is longer than 80, multiple line will be
73 * printed, with @pfx is printed at the beginning of each line.
74 */
c413d768 75void cper_print_bits(const char *pfx, unsigned int bits,
88f074f4 76 const char * const strs[], unsigned int strs_size)
f59c55d0
HY
77{
78 int i, len = 0;
79 const char *str;
80 char buf[84];
81
82 for (i = 0; i < strs_size; i++) {
83 if (!(bits & (1U << i)))
84 continue;
85 str = strs[i];
c413d768
HY
86 if (!str)
87 continue;
f59c55d0
HY
88 if (len && len + strlen(str) + 2 > 80) {
89 printk("%s\n", buf);
90 len = 0;
91 }
92 if (!len)
93 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
94 else
95 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
96 }
97 if (len)
98 printk("%s\n", buf);
99}
100
88f074f4 101static const char * const cper_proc_type_strs[] = {
f59c55d0
HY
102 "IA32/X64",
103 "IA64",
104};
105
88f074f4 106static const char * const cper_proc_isa_strs[] = {
f59c55d0
HY
107 "IA32",
108 "IA64",
109 "X64",
110};
111
88f074f4 112static const char * const cper_proc_error_type_strs[] = {
f59c55d0
HY
113 "cache error",
114 "TLB error",
115 "bus error",
116 "micro-architectural error",
117};
118
88f074f4 119static const char * const cper_proc_op_strs[] = {
f59c55d0
HY
120 "unknown or generic",
121 "data read",
122 "data write",
123 "instruction execution",
124};
125
88f074f4 126static const char * const cper_proc_flag_strs[] = {
f59c55d0
HY
127 "restartable",
128 "precise IP",
129 "overflow",
130 "corrected",
131};
132
133static void cper_print_proc_generic(const char *pfx,
134 const struct cper_sec_proc_generic *proc)
135{
136 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
137 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
138 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
139 cper_proc_type_strs[proc->proc_type] : "unknown");
140 if (proc->validation_bits & CPER_PROC_VALID_ISA)
141 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
142 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
143 cper_proc_isa_strs[proc->proc_isa] : "unknown");
144 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
145 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
146 cper_print_bits(pfx, proc->proc_error_type,
147 cper_proc_error_type_strs,
148 ARRAY_SIZE(cper_proc_error_type_strs));
149 }
150 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
151 printk("%s""operation: %d, %s\n", pfx, proc->operation,
152 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
153 cper_proc_op_strs[proc->operation] : "unknown");
154 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
155 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
156 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
157 ARRAY_SIZE(cper_proc_flag_strs));
158 }
159 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
160 printk("%s""level: %d\n", pfx, proc->level);
161 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
162 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
163 if (proc->validation_bits & CPER_PROC_VALID_ID)
164 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
165 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
166 printk("%s""target_address: 0x%016llx\n",
167 pfx, proc->target_addr);
168 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
169 printk("%s""requestor_id: 0x%016llx\n",
170 pfx, proc->requestor_id);
171 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
172 printk("%s""responder_id: 0x%016llx\n",
173 pfx, proc->responder_id);
174 if (proc->validation_bits & CPER_PROC_VALID_IP)
175 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
176}
177
178static const char *cper_mem_err_type_strs[] = {
179 "unknown",
180 "no error",
181 "single-bit ECC",
182 "multi-bit ECC",
183 "single-symbol chipkill ECC",
184 "multi-symbol chipkill ECC",
185 "master abort",
186 "target abort",
187 "parity error",
188 "watchdog timeout",
189 "invalid address",
190 "mirror Broken",
191 "memory sparing",
192 "scrub corrected error",
193 "scrub uncorrected error",
147de147 194 "physical memory map-out event",
f59c55d0
HY
195};
196
197static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
198{
199 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
200 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
147de147 201 if (mem->validation_bits & CPER_MEM_VALID_PA)
f59c55d0
HY
202 printk("%s""physical_address: 0x%016llx\n",
203 pfx, mem->physical_addr);
147de147 204 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
f59c55d0
HY
205 printk("%s""physical_address_mask: 0x%016llx\n",
206 pfx, mem->physical_addr_mask);
207 if (mem->validation_bits & CPER_MEM_VALID_NODE)
208 printk("%s""node: %d\n", pfx, mem->node);
209 if (mem->validation_bits & CPER_MEM_VALID_CARD)
210 printk("%s""card: %d\n", pfx, mem->card);
211 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
212 printk("%s""module: %d\n", pfx, mem->module);
213 if (mem->validation_bits & CPER_MEM_VALID_BANK)
214 printk("%s""bank: %d\n", pfx, mem->bank);
215 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
216 printk("%s""device: %d\n", pfx, mem->device);
217 if (mem->validation_bits & CPER_MEM_VALID_ROW)
218 printk("%s""row: %d\n", pfx, mem->row);
219 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
220 printk("%s""column: %d\n", pfx, mem->column);
221 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
222 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
223 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
224 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
225 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
226 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
227 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
228 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
229 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
230 u8 etype = mem->error_type;
231 printk("%s""error_type: %d, %s\n", pfx, etype,
232 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
233 cper_mem_err_type_strs[etype] : "unknown");
234 }
235}
236
237static const char *cper_pcie_port_type_strs[] = {
238 "PCIe end point",
239 "legacy PCI end point",
240 "unknown",
241 "unknown",
242 "root port",
243 "upstream switch port",
244 "downstream switch port",
245 "PCIe to PCI/PCI-X bridge",
246 "PCI/PCI-X to PCIe bridge",
247 "root complex integrated endpoint device",
248 "root complex event collector",
249};
250
c413d768 251static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
88f074f4 252 const struct acpi_generic_data *gdata)
f59c55d0
HY
253{
254 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
255 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
256 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
257 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
258 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
259 printk("%s""version: %d.%d\n", pfx,
260 pcie->version.major, pcie->version.minor);
261 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
262 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
263 pcie->command, pcie->status);
264 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
265 const __u8 *p;
266 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
267 pcie->device_id.segment, pcie->device_id.bus,
268 pcie->device_id.device, pcie->device_id.function);
269 printk("%s""slot: %d\n", pfx,
270 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
271 printk("%s""secondary_bus: 0x%02x\n", pfx,
272 pcie->device_id.secondary_bus);
273 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
274 pcie->device_id.vendor_id, pcie->device_id.device_id);
275 p = pcie->device_id.class_code;
276 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
277 }
278 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
279 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
280 pcie->serial_number.lower, pcie->serial_number.upper);
281 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
282 printk(
283 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
284 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
285}
286
88f074f4 287static const char * const cper_estatus_section_flag_strs[] = {
f59c55d0
HY
288 "primary",
289 "containment warning",
290 "reset",
88f074f4 291 "error threshold exceeded",
f59c55d0
HY
292 "resource not accessible",
293 "latent error",
294};
295
88f074f4
CG
296static void cper_estatus_print_section(
297 const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
f59c55d0
HY
298{
299 uuid_le *sec_type = (uuid_le *)gdata->section_type;
300 __u16 severity;
301
302 severity = gdata->error_severity;
303 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
304 cper_severity_str(severity));
305 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
88f074f4
CG
306 cper_print_bits(pfx, gdata->flags, cper_estatus_section_flag_strs,
307 ARRAY_SIZE(cper_estatus_section_flag_strs));
f59c55d0
HY
308 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
309 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
310 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
311 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
312
313 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
314 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
315 printk("%s""section_type: general processor error\n", pfx);
316 if (gdata->error_data_length >= sizeof(*proc_err))
317 cper_print_proc_generic(pfx, proc_err);
318 else
319 goto err_section_too_small;
320 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
321 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
322 printk("%s""section_type: memory error\n", pfx);
323 if (gdata->error_data_length >= sizeof(*mem_err))
324 cper_print_mem(pfx, mem_err);
325 else
326 goto err_section_too_small;
327 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
328 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
329 printk("%s""section_type: PCIe error\n", pfx);
330 if (gdata->error_data_length >= sizeof(*pcie))
c413d768 331 cper_print_pcie(pfx, pcie, gdata);
f59c55d0
HY
332 else
333 goto err_section_too_small;
334 } else
335 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
336
337 return;
338
339err_section_too_small:
340 pr_err(FW_WARN "error section length is too small\n");
341}
342
88f074f4
CG
343void cper_estatus_print(const char *pfx,
344 const struct acpi_generic_status *estatus)
f59c55d0 345{
88f074f4 346 struct acpi_generic_data *gdata;
f59c55d0
HY
347 unsigned int data_len, gedata_len;
348 int sec_no = 0;
349 __u16 severity;
350
88f074f4 351 printk("%s""Generic Hardware Error Status\n", pfx);
f59c55d0
HY
352 severity = estatus->error_severity;
353 printk("%s""severity: %d, %s\n", pfx, severity,
354 cper_severity_str(severity));
355 data_len = estatus->data_length;
88f074f4 356 gdata = (struct acpi_generic_data *)(estatus + 1);
833ba4b1 357 while (data_len >= sizeof(*gdata)) {
f59c55d0 358 gedata_len = gdata->error_data_length;
88f074f4 359 cper_estatus_print_section(pfx, gdata, sec_no);
f59c55d0 360 data_len -= gedata_len + sizeof(*gdata);
37d2a362 361 gdata = (void *)(gdata + 1) + gedata_len;
f59c55d0
HY
362 sec_no++;
363 }
364}
88f074f4 365EXPORT_SYMBOL_GPL(cper_estatus_print);
f59c55d0 366
88f074f4 367int cper_estatus_check_header(const struct acpi_generic_status *estatus)
06d65dea
HY
368{
369 if (estatus->data_length &&
88f074f4 370 estatus->data_length < sizeof(struct acpi_generic_data))
06d65dea
HY
371 return -EINVAL;
372 if (estatus->raw_data_length &&
373 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
374 return -EINVAL;
375
376 return 0;
377}
88f074f4 378EXPORT_SYMBOL_GPL(cper_estatus_check_header);
06d65dea 379
88f074f4 380int cper_estatus_check(const struct acpi_generic_status *estatus)
06d65dea 381{
88f074f4 382 struct acpi_generic_data *gdata;
06d65dea
HY
383 unsigned int data_len, gedata_len;
384 int rc;
385
88f074f4 386 rc = cper_estatus_check_header(estatus);
06d65dea
HY
387 if (rc)
388 return rc;
389 data_len = estatus->data_length;
88f074f4 390 gdata = (struct acpi_generic_data *)(estatus + 1);
aaf9d93b 391 while (data_len >= sizeof(*gdata)) {
06d65dea
HY
392 gedata_len = gdata->error_data_length;
393 if (gedata_len > data_len - sizeof(*gdata))
394 return -EINVAL;
395 data_len -= gedata_len + sizeof(*gdata);
37d2a362 396 gdata = (void *)(gdata + 1) + gedata_len;
06d65dea
HY
397 }
398 if (data_len)
399 return -EINVAL;
400
401 return 0;
402}
88f074f4 403EXPORT_SYMBOL_GPL(cper_estatus_check);