Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
c9c2877d HD |
2 | /* |
3 | * Page Deallocation Table (PDT) support | |
4 | * | |
54ac8fcb HD |
5 | * The Page Deallocation Table (PDT) is maintained by firmware and holds a |
6 | * list of memory addresses in which memory errors were detected. | |
7 | * The list contains both single-bit (correctable) and double-bit | |
8 | * (uncorrectable) errors. | |
c9c2877d HD |
9 | * |
10 | * Copyright 2017 by Helge Deller <deller@gmx.de> | |
11 | * | |
54ac8fcb HD |
12 | * possible future enhancements: |
13 | * - add userspace interface via procfs or sysfs to clear PDT | |
c9c2877d HD |
14 | */ |
15 | ||
16 | #include <linux/memblock.h> | |
17 | #include <linux/seq_file.h> | |
54ac8fcb | 18 | #include <linux/kthread.h> |
ea697648 | 19 | #include <linux/initrd.h> |
c9c2877d HD |
20 | |
21 | #include <asm/pdc.h> | |
22 | #include <asm/pdcpat.h> | |
23 | #include <asm/sections.h> | |
24 | #include <asm/pgtable.h> | |
25 | ||
26 | enum pdt_access_type { | |
27 | PDT_NONE, | |
28 | PDT_PDC, | |
29 | PDT_PAT_NEW, | |
54ac8fcb | 30 | PDT_PAT_CELL |
c9c2877d HD |
31 | }; |
32 | ||
33 | static enum pdt_access_type pdt_type; | |
34 | ||
54ac8fcb HD |
35 | /* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */ |
36 | #define PDT_POLL_INTERVAL_DEFAULT (5*60*HZ) | |
37 | #define PDT_POLL_INTERVAL_SHORT (1*60*HZ) | |
38 | static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT; | |
39 | ||
c9c2877d HD |
40 | /* global PDT status information */ |
41 | static struct pdc_mem_retinfo pdt_status; | |
42 | ||
43 | #define MAX_PDT_TABLE_SIZE PAGE_SIZE | |
44 | #define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long)) | |
45 | static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss; | |
46 | ||
54ac8fcb HD |
47 | /* |
48 | * Constants for the pdt_entry format: | |
49 | * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are | |
50 | * reserved, bit 62 is the perm bit and bit 63 is the error_type bit. | |
51 | * The perm bit indicates whether the error have been verified as a permanent | |
52 | * error (value of 1) or has not been verified, and may be transient (value | |
53 | * of 0). The error_type bit indicates whether the error is a single bit error | |
54 | * (value of 1) or a multiple bit error. | |
55 | * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit | |
56 | * 63. Those machines don't provide the perm bit. | |
57 | */ | |
58 | ||
59 | #define PDT_ADDR_PHYS_MASK (pdt_type != PDT_PDC ? ~0x3f : ~0x0f) | |
60 | #define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL) | |
61 | #define PDT_ADDR_SINGLE_ERR 1UL | |
c9c2877d HD |
62 | |
63 | /* report PDT entries via /proc/meminfo */ | |
64 | void arch_report_meminfo(struct seq_file *m) | |
65 | { | |
66 | if (pdt_type == PDT_NONE) | |
67 | return; | |
68 | ||
69 | seq_printf(m, "PDT_max_entries: %7lu\n", | |
70 | pdt_status.pdt_size); | |
71 | seq_printf(m, "PDT_cur_entries: %7lu\n", | |
72 | pdt_status.pdt_entries); | |
73 | } | |
74 | ||
54ac8fcb HD |
75 | static int get_info_pat_new(void) |
76 | { | |
77 | struct pdc_pat_mem_retinfo pat_rinfo; | |
78 | int ret; | |
79 | ||
80 | /* newer PAT machines like C8000 report info for all cells */ | |
81 | if (is_pdc_pat()) | |
82 | ret = pdc_pat_mem_pdt_info(&pat_rinfo); | |
83 | else | |
84 | return PDC_BAD_PROC; | |
85 | ||
86 | pdt_status.pdt_size = pat_rinfo.max_pdt_entries; | |
87 | pdt_status.pdt_entries = pat_rinfo.current_pdt_entries; | |
88 | pdt_status.pdt_status = 0; | |
89 | pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc; | |
90 | pdt_status.good_mem = pat_rinfo.good_mem; | |
91 | ||
92 | return ret; | |
93 | } | |
94 | ||
95 | static int get_info_pat_cell(void) | |
96 | { | |
97 | struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo; | |
98 | int ret; | |
99 | ||
100 | /* older PAT machines like rp5470 report cell info only */ | |
101 | if (is_pdc_pat()) | |
102 | ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num); | |
103 | else | |
104 | return PDC_BAD_PROC; | |
105 | ||
106 | pdt_status.pdt_size = cell_rinfo.max_pdt_entries; | |
107 | pdt_status.pdt_entries = cell_rinfo.current_pdt_entries; | |
108 | pdt_status.pdt_status = 0; | |
109 | pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc; | |
110 | pdt_status.good_mem = cell_rinfo.good_mem; | |
111 | ||
112 | return ret; | |
113 | } | |
114 | ||
115 | static void report_mem_err(unsigned long pde) | |
116 | { | |
117 | struct pdc_pat_mem_phys_mem_location loc; | |
118 | unsigned long addr; | |
119 | char dimm_txt[32]; | |
120 | ||
121 | addr = pde & PDT_ADDR_PHYS_MASK; | |
122 | ||
123 | /* show DIMM slot description on PAT machines */ | |
124 | if (is_pdc_pat()) { | |
125 | pdc_pat_mem_get_dimm_phys_location(&loc, addr); | |
126 | sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot); | |
127 | } else | |
128 | dimm_txt[0] = 0; | |
129 | ||
130 | pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n", | |
131 | addr, dimm_txt, | |
132 | pde & PDT_ADDR_PERM_ERR ? "permanent ":"", | |
133 | pde & PDT_ADDR_SINGLE_ERR ? "single":"multi"); | |
134 | } | |
135 | ||
136 | ||
c9c2877d HD |
137 | /* |
138 | * pdc_pdt_init() | |
139 | * | |
140 | * Initialize kernel PDT structures, read initial PDT table from firmware, | |
141 | * report all current PDT entries and mark bad memory with memblock_reserve() | |
142 | * to avoid that the kernel will use broken memory areas. | |
143 | * | |
144 | */ | |
145 | void __init pdc_pdt_init(void) | |
146 | { | |
147 | int ret, i; | |
148 | unsigned long entries; | |
149 | struct pdc_mem_read_pdt pdt_read_ret; | |
150 | ||
54ac8fcb HD |
151 | pdt_type = PDT_PAT_NEW; |
152 | ret = get_info_pat_new(); | |
c9c2877d | 153 | |
54ac8fcb HD |
154 | if (ret != PDC_OK) { |
155 | pdt_type = PDT_PAT_CELL; | |
156 | ret = get_info_pat_cell(); | |
157 | } | |
158 | ||
159 | if (ret != PDC_OK) { | |
c9c2877d | 160 | pdt_type = PDT_PDC; |
54ac8fcb | 161 | /* non-PAT machines provide the standard PDC call */ |
c9c2877d HD |
162 | ret = pdc_mem_pdt_info(&pdt_status); |
163 | } | |
164 | ||
165 | if (ret != PDC_OK) { | |
166 | pdt_type = PDT_NONE; | |
167 | pr_info("PDT: Firmware does not provide any page deallocation" | |
168 | " information.\n"); | |
169 | return; | |
170 | } | |
171 | ||
172 | entries = pdt_status.pdt_entries; | |
54ac8fcb HD |
173 | if (WARN_ON(entries > MAX_PDT_ENTRIES)) |
174 | entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES; | |
c9c2877d | 175 | |
54ac8fcb HD |
176 | pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx," |
177 | " good_mem %lu MB\n", | |
178 | pdt_type == PDT_PDC ? __stringify(PDT_PDC) : | |
179 | pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL) | |
180 | : __stringify(PDT_PAT_NEW), | |
c9c2877d HD |
181 | pdt_status.pdt_size, pdt_status.pdt_entries, |
182 | pdt_status.pdt_status, pdt_status.first_dbe_loc, | |
54ac8fcb | 183 | pdt_status.good_mem / 1024 / 1024); |
c9c2877d HD |
184 | |
185 | if (entries == 0) { | |
186 | pr_info("PDT: Firmware reports all memory OK.\n"); | |
187 | return; | |
188 | } | |
189 | ||
190 | if (pdt_status.first_dbe_loc && | |
191 | pdt_status.first_dbe_loc <= __pa((unsigned long)&_end)) | |
192 | pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n"); | |
193 | ||
194 | pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n", | |
195 | entries); | |
196 | ||
197 | if (pdt_type == PDT_PDC) | |
198 | ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry); | |
199 | else { | |
200 | #ifdef CONFIG_64BIT | |
201 | struct pdc_pat_mem_read_pd_retinfo pat_pret; | |
202 | ||
54ac8fcb HD |
203 | if (pdt_type == PDT_PAT_CELL) |
204 | ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, | |
205 | MAX_PDT_ENTRIES); | |
206 | else | |
c9c2877d HD |
207 | ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry, |
208 | MAX_PDT_TABLE_SIZE, 0); | |
c9c2877d HD |
209 | #else |
210 | ret = PDC_BAD_PROC; | |
211 | #endif | |
212 | } | |
213 | ||
214 | if (ret != PDC_OK) { | |
215 | pdt_type = PDT_NONE; | |
54ac8fcb | 216 | pr_warn("PDT: Get PDT entries failed with %d\n", ret); |
c9c2877d HD |
217 | return; |
218 | } | |
219 | ||
220 | for (i = 0; i < pdt_status.pdt_entries; i++) { | |
ea697648 HD |
221 | unsigned long addr; |
222 | ||
54ac8fcb HD |
223 | report_mem_err(pdt_entry[i]); |
224 | ||
ea697648 HD |
225 | addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK; |
226 | if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && | |
227 | addr >= initrd_start && addr < initrd_end) | |
228 | pr_crit("CRITICAL: initrd possibly broken " | |
229 | "due to bad memory!\n"); | |
230 | ||
54ac8fcb HD |
231 | /* mark memory page bad */ |
232 | memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); | |
233 | } | |
234 | } | |
c46bafc4 | 235 | |
54ac8fcb HD |
236 | |
237 | /* | |
238 | * This is the PDT kernel thread main loop. | |
239 | */ | |
240 | ||
241 | static int pdt_mainloop(void *unused) | |
242 | { | |
243 | struct pdc_mem_read_pdt pdt_read_ret; | |
244 | struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused; | |
245 | unsigned long old_num_entries; | |
246 | unsigned long *bad_mem_ptr; | |
247 | int num, ret; | |
248 | ||
249 | for (;;) { | |
250 | set_current_state(TASK_INTERRUPTIBLE); | |
251 | ||
252 | old_num_entries = pdt_status.pdt_entries; | |
253 | ||
254 | schedule_timeout(pdt_poll_interval); | |
255 | if (kthread_should_stop()) | |
256 | break; | |
257 | ||
258 | /* Do we have new PDT entries? */ | |
259 | switch (pdt_type) { | |
260 | case PDT_PAT_NEW: | |
261 | ret = get_info_pat_new(); | |
262 | break; | |
263 | case PDT_PAT_CELL: | |
264 | ret = get_info_pat_cell(); | |
265 | break; | |
266 | default: | |
267 | ret = pdc_mem_pdt_info(&pdt_status); | |
268 | break; | |
269 | } | |
270 | ||
271 | if (ret != PDC_OK) { | |
272 | pr_warn("PDT: unexpected failure %d\n", ret); | |
273 | return -EINVAL; | |
274 | } | |
275 | ||
276 | /* if no new PDT entries, just wait again */ | |
277 | num = pdt_status.pdt_entries - old_num_entries; | |
278 | if (num <= 0) | |
279 | continue; | |
280 | ||
281 | /* decrease poll interval in case we found memory errors */ | |
282 | if (pdt_status.pdt_entries && | |
283 | pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT) | |
284 | pdt_poll_interval = PDT_POLL_INTERVAL_SHORT; | |
285 | ||
286 | /* limit entries to get */ | |
287 | if (num > MAX_PDT_ENTRIES) { | |
288 | num = MAX_PDT_ENTRIES; | |
289 | pdt_status.pdt_entries = old_num_entries + num; | |
290 | } | |
291 | ||
292 | /* get new entries */ | |
293 | switch (pdt_type) { | |
c46bafc4 | 294 | #ifdef CONFIG_64BIT |
54ac8fcb HD |
295 | case PDT_PAT_CELL: |
296 | if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) { | |
297 | pr_crit("PDT: too many entries.\n"); | |
298 | return -ENOMEM; | |
299 | } | |
300 | ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, | |
301 | MAX_PDT_ENTRIES); | |
302 | bad_mem_ptr = &pdt_entry[old_num_entries]; | |
303 | break; | |
304 | case PDT_PAT_NEW: | |
305 | ret = pdc_pat_mem_read_pd_pdt(&pat_pret, | |
306 | pdt_entry, | |
307 | num * sizeof(unsigned long), | |
308 | old_num_entries * sizeof(unsigned long)); | |
309 | bad_mem_ptr = &pdt_entry[0]; | |
310 | break; | |
c46bafc4 | 311 | #endif |
54ac8fcb HD |
312 | default: |
313 | ret = pdc_mem_pdt_read_entries(&pdt_read_ret, | |
314 | pdt_entry); | |
315 | bad_mem_ptr = &pdt_entry[old_num_entries]; | |
316 | break; | |
317 | } | |
c46bafc4 | 318 | |
54ac8fcb HD |
319 | /* report and mark memory broken */ |
320 | while (num--) { | |
321 | unsigned long pde = *bad_mem_ptr++; | |
c9c2877d | 322 | |
54ac8fcb HD |
323 | report_mem_err(pde); |
324 | ||
325 | #ifdef CONFIG_MEMORY_FAILURE | |
326 | if ((pde & PDT_ADDR_PERM_ERR) || | |
327 | ((pde & PDT_ADDR_SINGLE_ERR) == 0)) | |
83b57531 | 328 | memory_failure(pde >> PAGE_SHIFT, 0); |
54ac8fcb HD |
329 | else |
330 | soft_offline_page( | |
331 | pfn_to_page(pde >> PAGE_SHIFT), 0); | |
332 | #else | |
333 | pr_crit("PDT: memory error at 0x%lx ignored.\n" | |
334 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y " | |
335 | "for real handling.\n", | |
336 | pde & PDT_ADDR_PHYS_MASK); | |
337 | #endif | |
338 | ||
339 | } | |
c9c2877d | 340 | } |
54ac8fcb HD |
341 | |
342 | return 0; | |
c9c2877d | 343 | } |
54ac8fcb HD |
344 | |
345 | ||
346 | static int __init pdt_initcall(void) | |
347 | { | |
348 | struct task_struct *kpdtd_task; | |
349 | ||
350 | if (pdt_type == PDT_NONE) | |
351 | return -ENODEV; | |
352 | ||
353 | kpdtd_task = kthread_create(pdt_mainloop, NULL, "kpdtd"); | |
354 | if (IS_ERR(kpdtd_task)) | |
355 | return PTR_ERR(kpdtd_task); | |
356 | ||
357 | wake_up_process(kpdtd_task); | |
358 | ||
359 | return 0; | |
360 | } | |
361 | ||
362 | late_initcall(pdt_initcall); |