Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
c9c2877d HD |
2 | /* |
3 | * Page Deallocation Table (PDT) support | |
4 | * | |
54ac8fcb HD |
5 | * The Page Deallocation Table (PDT) is maintained by firmware and holds a |
6 | * list of memory addresses in which memory errors were detected. | |
7 | * The list contains both single-bit (correctable) and double-bit | |
8 | * (uncorrectable) errors. | |
c9c2877d HD |
9 | * |
10 | * Copyright 2017 by Helge Deller <deller@gmx.de> | |
11 | * | |
54ac8fcb HD |
12 | * possible future enhancements: |
13 | * - add userspace interface via procfs or sysfs to clear PDT | |
c9c2877d HD |
14 | */ |
15 | ||
16 | #include <linux/memblock.h> | |
17 | #include <linux/seq_file.h> | |
54ac8fcb | 18 | #include <linux/kthread.h> |
954b41be | 19 | #include <linux/proc_fs.h> |
ea697648 | 20 | #include <linux/initrd.h> |
65fddcfc | 21 | #include <linux/pgtable.h> |
d027122d | 22 | #include <linux/mm.h> |
c9c2877d HD |
23 | |
24 | #include <asm/pdc.h> | |
25 | #include <asm/pdcpat.h> | |
26 | #include <asm/sections.h> | |
c4551d1b | 27 | #include <asm/pgtable.h> |
c9c2877d HD |
28 | |
29 | enum pdt_access_type { | |
30 | PDT_NONE, | |
31 | PDT_PDC, | |
32 | PDT_PAT_NEW, | |
54ac8fcb | 33 | PDT_PAT_CELL |
c9c2877d HD |
34 | }; |
35 | ||
36 | static enum pdt_access_type pdt_type; | |
37 | ||
54ac8fcb HD |
38 | /* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */ |
39 | #define PDT_POLL_INTERVAL_DEFAULT (5*60*HZ) | |
40 | #define PDT_POLL_INTERVAL_SHORT (1*60*HZ) | |
41 | static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT; | |
42 | ||
c9c2877d HD |
43 | /* global PDT status information */ |
44 | static struct pdc_mem_retinfo pdt_status; | |
45 | ||
46 | #define MAX_PDT_TABLE_SIZE PAGE_SIZE | |
47 | #define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long)) | |
48 | static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss; | |
49 | ||
54ac8fcb HD |
50 | /* |
51 | * Constants for the pdt_entry format: | |
52 | * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are | |
53 | * reserved, bit 62 is the perm bit and bit 63 is the error_type bit. | |
54 | * The perm bit indicates whether the error have been verified as a permanent | |
55 | * error (value of 1) or has not been verified, and may be transient (value | |
56 | * of 0). The error_type bit indicates whether the error is a single bit error | |
57 | * (value of 1) or a multiple bit error. | |
58 | * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit | |
59 | * 63. Those machines don't provide the perm bit. | |
60 | */ | |
61 | ||
62 | #define PDT_ADDR_PHYS_MASK (pdt_type != PDT_PDC ? ~0x3f : ~0x0f) | |
63 | #define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL) | |
64 | #define PDT_ADDR_SINGLE_ERR 1UL | |
c9c2877d HD |
65 | |
66 | /* report PDT entries via /proc/meminfo */ | |
67 | void arch_report_meminfo(struct seq_file *m) | |
68 | { | |
69 | if (pdt_type == PDT_NONE) | |
70 | return; | |
71 | ||
72 | seq_printf(m, "PDT_max_entries: %7lu\n", | |
73 | pdt_status.pdt_size); | |
74 | seq_printf(m, "PDT_cur_entries: %7lu\n", | |
75 | pdt_status.pdt_entries); | |
76 | } | |
77 | ||
54ac8fcb HD |
78 | static int get_info_pat_new(void) |
79 | { | |
80 | struct pdc_pat_mem_retinfo pat_rinfo; | |
81 | int ret; | |
82 | ||
83 | /* newer PAT machines like C8000 report info for all cells */ | |
84 | if (is_pdc_pat()) | |
85 | ret = pdc_pat_mem_pdt_info(&pat_rinfo); | |
86 | else | |
87 | return PDC_BAD_PROC; | |
88 | ||
89 | pdt_status.pdt_size = pat_rinfo.max_pdt_entries; | |
90 | pdt_status.pdt_entries = pat_rinfo.current_pdt_entries; | |
91 | pdt_status.pdt_status = 0; | |
92 | pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc; | |
93 | pdt_status.good_mem = pat_rinfo.good_mem; | |
94 | ||
95 | return ret; | |
96 | } | |
97 | ||
98 | static int get_info_pat_cell(void) | |
99 | { | |
100 | struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo; | |
101 | int ret; | |
102 | ||
103 | /* older PAT machines like rp5470 report cell info only */ | |
104 | if (is_pdc_pat()) | |
105 | ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num); | |
106 | else | |
107 | return PDC_BAD_PROC; | |
108 | ||
109 | pdt_status.pdt_size = cell_rinfo.max_pdt_entries; | |
110 | pdt_status.pdt_entries = cell_rinfo.current_pdt_entries; | |
111 | pdt_status.pdt_status = 0; | |
112 | pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc; | |
113 | pdt_status.good_mem = cell_rinfo.good_mem; | |
114 | ||
115 | return ret; | |
116 | } | |
117 | ||
118 | static void report_mem_err(unsigned long pde) | |
119 | { | |
120 | struct pdc_pat_mem_phys_mem_location loc; | |
121 | unsigned long addr; | |
122 | char dimm_txt[32]; | |
123 | ||
124 | addr = pde & PDT_ADDR_PHYS_MASK; | |
125 | ||
126 | /* show DIMM slot description on PAT machines */ | |
127 | if (is_pdc_pat()) { | |
128 | pdc_pat_mem_get_dimm_phys_location(&loc, addr); | |
129 | sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot); | |
130 | } else | |
131 | dimm_txt[0] = 0; | |
132 | ||
133 | pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n", | |
134 | addr, dimm_txt, | |
135 | pde & PDT_ADDR_PERM_ERR ? "permanent ":"", | |
136 | pde & PDT_ADDR_SINGLE_ERR ? "single":"multi"); | |
137 | } | |
138 | ||
139 | ||
c9c2877d HD |
140 | /* |
141 | * pdc_pdt_init() | |
142 | * | |
143 | * Initialize kernel PDT structures, read initial PDT table from firmware, | |
144 | * report all current PDT entries and mark bad memory with memblock_reserve() | |
145 | * to avoid that the kernel will use broken memory areas. | |
146 | * | |
147 | */ | |
148 | void __init pdc_pdt_init(void) | |
149 | { | |
150 | int ret, i; | |
151 | unsigned long entries; | |
152 | struct pdc_mem_read_pdt pdt_read_ret; | |
153 | ||
54ac8fcb HD |
154 | pdt_type = PDT_PAT_NEW; |
155 | ret = get_info_pat_new(); | |
c9c2877d | 156 | |
54ac8fcb HD |
157 | if (ret != PDC_OK) { |
158 | pdt_type = PDT_PAT_CELL; | |
159 | ret = get_info_pat_cell(); | |
160 | } | |
161 | ||
162 | if (ret != PDC_OK) { | |
c9c2877d | 163 | pdt_type = PDT_PDC; |
54ac8fcb | 164 | /* non-PAT machines provide the standard PDC call */ |
c9c2877d HD |
165 | ret = pdc_mem_pdt_info(&pdt_status); |
166 | } | |
167 | ||
168 | if (ret != PDC_OK) { | |
169 | pdt_type = PDT_NONE; | |
170 | pr_info("PDT: Firmware does not provide any page deallocation" | |
171 | " information.\n"); | |
172 | return; | |
173 | } | |
174 | ||
175 | entries = pdt_status.pdt_entries; | |
54ac8fcb HD |
176 | if (WARN_ON(entries > MAX_PDT_ENTRIES)) |
177 | entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES; | |
c9c2877d | 178 | |
54ac8fcb HD |
179 | pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx," |
180 | " good_mem %lu MB\n", | |
181 | pdt_type == PDT_PDC ? __stringify(PDT_PDC) : | |
182 | pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL) | |
183 | : __stringify(PDT_PAT_NEW), | |
c9c2877d HD |
184 | pdt_status.pdt_size, pdt_status.pdt_entries, |
185 | pdt_status.pdt_status, pdt_status.first_dbe_loc, | |
54ac8fcb | 186 | pdt_status.good_mem / 1024 / 1024); |
c9c2877d HD |
187 | |
188 | if (entries == 0) { | |
189 | pr_info("PDT: Firmware reports all memory OK.\n"); | |
190 | return; | |
191 | } | |
192 | ||
193 | if (pdt_status.first_dbe_loc && | |
194 | pdt_status.first_dbe_loc <= __pa((unsigned long)&_end)) | |
195 | pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n"); | |
196 | ||
197 | pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n", | |
198 | entries); | |
199 | ||
200 | if (pdt_type == PDT_PDC) | |
201 | ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry); | |
202 | else { | |
203 | #ifdef CONFIG_64BIT | |
204 | struct pdc_pat_mem_read_pd_retinfo pat_pret; | |
205 | ||
54ac8fcb HD |
206 | if (pdt_type == PDT_PAT_CELL) |
207 | ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, | |
208 | MAX_PDT_ENTRIES); | |
209 | else | |
c9c2877d HD |
210 | ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry, |
211 | MAX_PDT_TABLE_SIZE, 0); | |
c9c2877d HD |
212 | #else |
213 | ret = PDC_BAD_PROC; | |
214 | #endif | |
215 | } | |
216 | ||
217 | if (ret != PDC_OK) { | |
218 | pdt_type = PDT_NONE; | |
54ac8fcb | 219 | pr_warn("PDT: Get PDT entries failed with %d\n", ret); |
c9c2877d HD |
220 | return; |
221 | } | |
222 | ||
223 | for (i = 0; i < pdt_status.pdt_entries; i++) { | |
ea697648 HD |
224 | unsigned long addr; |
225 | ||
54ac8fcb HD |
226 | report_mem_err(pdt_entry[i]); |
227 | ||
ea697648 HD |
228 | addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK; |
229 | if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && | |
230 | addr >= initrd_start && addr < initrd_end) | |
231 | pr_crit("CRITICAL: initrd possibly broken " | |
232 | "due to bad memory!\n"); | |
233 | ||
54ac8fcb HD |
234 | /* mark memory page bad */ |
235 | memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); | |
a46c9304 | 236 | num_poisoned_pages_inc(addr >> PAGE_SHIFT); |
54ac8fcb HD |
237 | } |
238 | } | |
c46bafc4 | 239 | |
54ac8fcb HD |
240 | |
241 | /* | |
242 | * This is the PDT kernel thread main loop. | |
243 | */ | |
244 | ||
245 | static int pdt_mainloop(void *unused) | |
246 | { | |
247 | struct pdc_mem_read_pdt pdt_read_ret; | |
248 | struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused; | |
249 | unsigned long old_num_entries; | |
250 | unsigned long *bad_mem_ptr; | |
251 | int num, ret; | |
252 | ||
253 | for (;;) { | |
254 | set_current_state(TASK_INTERRUPTIBLE); | |
255 | ||
256 | old_num_entries = pdt_status.pdt_entries; | |
257 | ||
258 | schedule_timeout(pdt_poll_interval); | |
259 | if (kthread_should_stop()) | |
260 | break; | |
261 | ||
262 | /* Do we have new PDT entries? */ | |
263 | switch (pdt_type) { | |
264 | case PDT_PAT_NEW: | |
265 | ret = get_info_pat_new(); | |
266 | break; | |
267 | case PDT_PAT_CELL: | |
268 | ret = get_info_pat_cell(); | |
269 | break; | |
270 | default: | |
271 | ret = pdc_mem_pdt_info(&pdt_status); | |
272 | break; | |
273 | } | |
274 | ||
275 | if (ret != PDC_OK) { | |
276 | pr_warn("PDT: unexpected failure %d\n", ret); | |
277 | return -EINVAL; | |
278 | } | |
279 | ||
280 | /* if no new PDT entries, just wait again */ | |
281 | num = pdt_status.pdt_entries - old_num_entries; | |
282 | if (num <= 0) | |
283 | continue; | |
284 | ||
285 | /* decrease poll interval in case we found memory errors */ | |
286 | if (pdt_status.pdt_entries && | |
287 | pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT) | |
288 | pdt_poll_interval = PDT_POLL_INTERVAL_SHORT; | |
289 | ||
290 | /* limit entries to get */ | |
291 | if (num > MAX_PDT_ENTRIES) { | |
292 | num = MAX_PDT_ENTRIES; | |
293 | pdt_status.pdt_entries = old_num_entries + num; | |
294 | } | |
295 | ||
296 | /* get new entries */ | |
297 | switch (pdt_type) { | |
c46bafc4 | 298 | #ifdef CONFIG_64BIT |
54ac8fcb HD |
299 | case PDT_PAT_CELL: |
300 | if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) { | |
301 | pr_crit("PDT: too many entries.\n"); | |
302 | return -ENOMEM; | |
303 | } | |
304 | ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, | |
305 | MAX_PDT_ENTRIES); | |
306 | bad_mem_ptr = &pdt_entry[old_num_entries]; | |
307 | break; | |
308 | case PDT_PAT_NEW: | |
309 | ret = pdc_pat_mem_read_pd_pdt(&pat_pret, | |
310 | pdt_entry, | |
311 | num * sizeof(unsigned long), | |
312 | old_num_entries * sizeof(unsigned long)); | |
313 | bad_mem_ptr = &pdt_entry[0]; | |
314 | break; | |
c46bafc4 | 315 | #endif |
54ac8fcb HD |
316 | default: |
317 | ret = pdc_mem_pdt_read_entries(&pdt_read_ret, | |
318 | pdt_entry); | |
319 | bad_mem_ptr = &pdt_entry[old_num_entries]; | |
320 | break; | |
321 | } | |
c46bafc4 | 322 | |
54ac8fcb HD |
323 | /* report and mark memory broken */ |
324 | while (num--) { | |
325 | unsigned long pde = *bad_mem_ptr++; | |
c9c2877d | 326 | |
54ac8fcb HD |
327 | report_mem_err(pde); |
328 | ||
329 | #ifdef CONFIG_MEMORY_FAILURE | |
330 | if ((pde & PDT_ADDR_PERM_ERR) || | |
331 | ((pde & PDT_ADDR_SINGLE_ERR) == 0)) | |
83b57531 | 332 | memory_failure(pde >> PAGE_SHIFT, 0); |
54ac8fcb | 333 | else |
36257d55 | 334 | soft_offline_page(pde >> PAGE_SHIFT, 0); |
54ac8fcb HD |
335 | #else |
336 | pr_crit("PDT: memory error at 0x%lx ignored.\n" | |
337 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y " | |
338 | "for real handling.\n", | |
339 | pde & PDT_ADDR_PHYS_MASK); | |
340 | #endif | |
341 | ||
342 | } | |
c9c2877d | 343 | } |
54ac8fcb HD |
344 | |
345 | return 0; | |
c9c2877d | 346 | } |
54ac8fcb HD |
347 | |
348 | ||
349 | static int __init pdt_initcall(void) | |
350 | { | |
351 | struct task_struct *kpdtd_task; | |
352 | ||
353 | if (pdt_type == PDT_NONE) | |
354 | return -ENODEV; | |
355 | ||
d1fbab7e | 356 | kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd"); |
54ac8fcb | 357 | |
56cf894e | 358 | return PTR_ERR_OR_ZERO(kpdtd_task); |
54ac8fcb HD |
359 | } |
360 | ||
361 | late_initcall(pdt_initcall); |