Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * fs/proc/kcore.c kernel ELF core dumper | |
4 | * | |
5 | * Modelled on fs/exec.c:aout_core_dump() | |
6 | * Jeremy Fitzhardinge <jeremy@sw.oz.au> | |
7 | * ELF version written by David Howells <David.Howells@nexor.co.uk> | |
8 | * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> | |
9 | * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> | |
10 | * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> | |
11 | */ | |
12 | ||
443cbaf9 | 13 | #include <linux/vmcore_info.h> |
1da177e4 LT |
14 | #include <linux/mm.h> |
15 | #include <linux/proc_fs.h> | |
2f96b8c1 | 16 | #include <linux/kcore.h> |
1da177e4 | 17 | #include <linux/user.h> |
16f7e0fe | 18 | #include <linux/capability.h> |
1da177e4 LT |
19 | #include <linux/elf.h> |
20 | #include <linux/elfcore.h> | |
21 | #include <linux/vmalloc.h> | |
22 | #include <linux/highmem.h> | |
87ebdc00 | 23 | #include <linux/printk.h> |
57c8a661 | 24 | #include <linux/memblock.h> |
1da177e4 | 25 | #include <linux/init.h> |
5a0e3ad6 | 26 | #include <linux/slab.h> |
46c0d6d0 | 27 | #include <linux/uio.h> |
1da177e4 | 28 | #include <asm/io.h> |
2ef43ec7 | 29 | #include <linux/list.h> |
3089aa1b | 30 | #include <linux/ioport.h> |
3089aa1b | 31 | #include <linux/memory.h> |
29930025 | 32 | #include <linux/sched/task.h> |
02e935bf | 33 | #include <linux/security.h> |
9492587c | 34 | #include <asm/sections.h> |
59d8053f | 35 | #include "internal.h" |
1da177e4 | 36 | |
36027604 | 37 | #define CORE_STR "CORE" |
1da177e4 | 38 | |
79885b22 EI |
39 | #ifndef ELF_CORE_EFLAGS |
40 | #define ELF_CORE_EFLAGS 0 | |
41 | #endif | |
42 | ||
97ce5d6d AD |
43 | static struct proc_dir_entry *proc_root_kcore; |
44 | ||
1da177e4 LT |
45 | |
46 | #ifndef kc_vaddr_to_offset | |
47 | #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) | |
48 | #endif | |
49 | #ifndef kc_offset_to_vaddr | |
50 | #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) | |
51 | #endif | |
52 | ||
2ef43ec7 | 53 | static LIST_HEAD(kclist_head); |
0b172f84 | 54 | static DECLARE_RWSEM(kclist_lock); |
3089aa1b | 55 | static int kcore_need_update = 1; |
1da177e4 | 56 | |
ffc8599a KS |
57 | /* |
58 | * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error | |
59 | * Same as oldmem_pfn_is_ram in vmcore | |
60 | */ | |
61 | static int (*mem_pfn_is_ram)(unsigned long pfn); | |
62 | ||
63 | int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) | |
64 | { | |
65 | if (mem_pfn_is_ram) | |
66 | return -EBUSY; | |
67 | mem_pfn_is_ram = fn; | |
68 | return 0; | |
69 | } | |
70 | ||
71 | static int pfn_is_ram(unsigned long pfn) | |
72 | { | |
73 | if (mem_pfn_is_ram) | |
74 | return mem_pfn_is_ram(pfn); | |
75 | else | |
76 | return 1; | |
77 | } | |
78 | ||
a8dd9c4d OS |
79 | /* This doesn't grab kclist_lock, so it should only be used at init time. */ |
80 | void __init kclist_add(struct kcore_list *new, void *addr, size_t size, | |
81 | int type) | |
1da177e4 LT |
82 | { |
83 | new->addr = (unsigned long)addr; | |
84 | new->size = size; | |
c30bb2a2 | 85 | new->type = type; |
1da177e4 | 86 | |
2ef43ec7 | 87 | list_add_tail(&new->list, &kclist_head); |
1da177e4 LT |
88 | } |
89 | ||
37e949bd OS |
90 | static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len, |
91 | size_t *data_offset) | |
1da177e4 LT |
92 | { |
93 | size_t try, size; | |
94 | struct kcore_list *m; | |
95 | ||
96 | *nphdr = 1; /* PT_NOTE */ | |
97 | size = 0; | |
98 | ||
2ef43ec7 | 99 | list_for_each_entry(m, &kclist_head, list) { |
1da177e4 LT |
100 | try = kc_vaddr_to_offset((size_t)m->addr + m->size); |
101 | if (try > size) | |
102 | size = try; | |
103 | *nphdr = *nphdr + 1; | |
104 | } | |
37e949bd OS |
105 | |
106 | *phdrs_len = *nphdr * sizeof(struct elf_phdr); | |
23c85094 OS |
107 | *notes_len = (4 * sizeof(struct elf_note) + |
108 | 3 * ALIGN(sizeof(CORE_STR), 4) + | |
109 | VMCOREINFO_NOTE_NAME_BYTES + | |
37e949bd OS |
110 | ALIGN(sizeof(struct elf_prstatus), 4) + |
111 | ALIGN(sizeof(struct elf_prpsinfo), 4) + | |
23c85094 OS |
112 | ALIGN(arch_task_struct_size, 4) + |
113 | ALIGN(vmcoreinfo_size, 4)); | |
37e949bd OS |
114 | *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len + |
115 | *notes_len); | |
116 | return *data_offset + size; | |
1da177e4 LT |
117 | } |
118 | ||
3089aa1b KH |
119 | #ifdef CONFIG_HIGHMEM |
120 | /* | |
121 | * If no highmem, we can assume [0...max_low_pfn) continuous range of memory | |
122 | * because memory hole is not as big as !HIGHMEM case. | |
123 | * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) | |
124 | */ | |
b66fb005 | 125 | static int kcore_ram_list(struct list_head *head) |
3089aa1b | 126 | { |
3089aa1b | 127 | struct kcore_list *ent; |
3089aa1b KH |
128 | |
129 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | |
130 | if (!ent) | |
131 | return -ENOMEM; | |
132 | ent->addr = (unsigned long)__va(0); | |
133 | ent->size = max_low_pfn << PAGE_SHIFT; | |
134 | ent->type = KCORE_RAM; | |
b66fb005 OS |
135 | list_add(&ent->list, head); |
136 | return 0; | |
3089aa1b KH |
137 | } |
138 | ||
139 | #else /* !CONFIG_HIGHMEM */ | |
140 | ||
26562c59 KH |
141 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
142 | /* calculate vmemmap's address from given system ram pfn and register it */ | |
b908243c DH |
143 | static int |
144 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | |
26562c59 KH |
145 | { |
146 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; | |
147 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; | |
148 | unsigned long start, end; | |
149 | struct kcore_list *vmm, *tmp; | |
150 | ||
151 | ||
152 | start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; | |
153 | end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; | |
108a8a11 | 154 | end = PAGE_ALIGN(end); |
26562c59 KH |
155 | /* overlap check (because we have to align page */ |
156 | list_for_each_entry(tmp, head, list) { | |
157 | if (tmp->type != KCORE_VMEMMAP) | |
158 | continue; | |
159 | if (start < tmp->addr + tmp->size) | |
160 | if (end > tmp->addr) | |
161 | end = tmp->addr; | |
162 | } | |
163 | if (start < end) { | |
164 | vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); | |
165 | if (!vmm) | |
166 | return 0; | |
167 | vmm->addr = start; | |
168 | vmm->size = end - start; | |
169 | vmm->type = KCORE_VMEMMAP; | |
170 | list_add_tail(&vmm->list, head); | |
171 | } | |
172 | return 1; | |
173 | ||
174 | } | |
175 | #else | |
b908243c DH |
176 | static int |
177 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | |
26562c59 KH |
178 | { |
179 | return 1; | |
180 | } | |
181 | ||
182 | #endif | |
183 | ||
3089aa1b KH |
184 | static int |
185 | kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) | |
186 | { | |
187 | struct list_head *head = (struct list_head *)arg; | |
188 | struct kcore_list *ent; | |
3955333d LA |
189 | struct page *p; |
190 | ||
191 | if (!pfn_valid(pfn)) | |
192 | return 1; | |
193 | ||
194 | p = pfn_to_page(pfn); | |
3089aa1b KH |
195 | |
196 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | |
197 | if (!ent) | |
198 | return -ENOMEM; | |
3955333d | 199 | ent->addr = (unsigned long)page_to_virt(p); |
3089aa1b KH |
200 | ent->size = nr_pages << PAGE_SHIFT; |
201 | ||
9b2d38b4 | 202 | if (!virt_addr_valid((void *)ent->addr)) |
3089aa1b KH |
203 | goto free_out; |
204 | ||
205 | /* cut not-mapped area. ....from ppc-32 code. */ | |
206 | if (ULONG_MAX - ent->addr < ent->size) | |
207 | ent->size = ULONG_MAX - ent->addr; | |
208 | ||
3955333d LA |
209 | /* |
210 | * We've already checked virt_addr_valid so we know this address | |
211 | * is a valid pointer, therefore we can check against it to determine | |
212 | * if we need to trim | |
213 | */ | |
214 | if (VMALLOC_START > ent->addr) { | |
3089aa1b KH |
215 | if (VMALLOC_START - ent->addr < ent->size) |
216 | ent->size = VMALLOC_START - ent->addr; | |
217 | } | |
218 | ||
219 | ent->type = KCORE_RAM; | |
220 | list_add_tail(&ent->list, head); | |
26562c59 KH |
221 | |
222 | if (!get_sparsemem_vmemmap_info(ent, head)) { | |
223 | list_del(&ent->list); | |
224 | goto free_out; | |
225 | } | |
226 | ||
3089aa1b KH |
227 | return 0; |
228 | free_out: | |
229 | kfree(ent); | |
230 | return 1; | |
231 | } | |
232 | ||
b66fb005 | 233 | static int kcore_ram_list(struct list_head *list) |
3089aa1b KH |
234 | { |
235 | int nid, ret; | |
236 | unsigned long end_pfn; | |
3089aa1b | 237 | |
698e7d16 | 238 | /* Not initialized....update now */ |
3089aa1b KH |
239 | /* find out "max pfn" */ |
240 | end_pfn = 0; | |
4ff1b2c2 | 241 | for_each_node_state(nid, N_MEMORY) { |
3089aa1b | 242 | unsigned long node_end; |
83285c72 | 243 | node_end = node_end_pfn(nid); |
3089aa1b KH |
244 | if (end_pfn < node_end) |
245 | end_pfn = node_end; | |
246 | } | |
247 | /* scan 0 to max_pfn */ | |
b66fb005 OS |
248 | ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private); |
249 | if (ret) | |
3089aa1b | 250 | return -ENOMEM; |
b66fb005 OS |
251 | return 0; |
252 | } | |
253 | #endif /* CONFIG_HIGHMEM */ | |
254 | ||
255 | static int kcore_update_ram(void) | |
256 | { | |
257 | LIST_HEAD(list); | |
258 | LIST_HEAD(garbage); | |
259 | int nphdr; | |
37e949bd | 260 | size_t phdrs_len, notes_len, data_offset; |
b66fb005 OS |
261 | struct kcore_list *tmp, *pos; |
262 | int ret = 0; | |
263 | ||
264 | down_write(&kclist_lock); | |
265 | if (!xchg(&kcore_need_update, 0)) | |
266 | goto out; | |
267 | ||
268 | ret = kcore_ram_list(&list); | |
269 | if (ret) { | |
270 | /* Couldn't get the RAM list, try again next time. */ | |
271 | WRITE_ONCE(kcore_need_update, 1); | |
272 | list_splice_tail(&list, &garbage); | |
273 | goto out; | |
274 | } | |
275 | ||
276 | list_for_each_entry_safe(pos, tmp, &kclist_head, list) { | |
277 | if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) | |
278 | list_move(&pos->list, &garbage); | |
279 | } | |
280 | list_splice_tail(&list, &kclist_head); | |
281 | ||
37e949bd OS |
282 | proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len, |
283 | &data_offset); | |
b66fb005 OS |
284 | |
285 | out: | |
286 | up_write(&kclist_lock); | |
287 | list_for_each_entry_safe(pos, tmp, &garbage, list) { | |
288 | list_del(&pos->list); | |
289 | kfree(pos); | |
3089aa1b | 290 | } |
3089aa1b KH |
291 | return ret; |
292 | } | |
1da177e4 | 293 | |
37e949bd OS |
294 | static void append_kcore_note(char *notes, size_t *i, const char *name, |
295 | unsigned int type, const void *desc, | |
296 | size_t descsz) | |
1da177e4 | 297 | { |
37e949bd OS |
298 | struct elf_note *note = (struct elf_note *)¬es[*i]; |
299 | ||
300 | note->n_namesz = strlen(name) + 1; | |
301 | note->n_descsz = descsz; | |
302 | note->n_type = type; | |
303 | *i += sizeof(*note); | |
304 | memcpy(¬es[*i], name, note->n_namesz); | |
305 | *i = ALIGN(*i + note->n_namesz, 4); | |
306 | memcpy(¬es[*i], desc, descsz); | |
307 | *i = ALIGN(*i + descsz, 4); | |
308 | } | |
1da177e4 | 309 | |
4c91c07c | 310 | static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) |
1da177e4 | 311 | { |
17457784 LS |
312 | struct file *file = iocb->ki_filp; |
313 | char *buf = file->private_data; | |
46c0d6d0 | 314 | loff_t *fpos = &iocb->ki_pos; |
37e949bd | 315 | size_t phdrs_offset, notes_offset, data_offset; |
c6d9eee2 | 316 | size_t page_offline_frozen = 1; |
37e949bd OS |
317 | size_t phdrs_len, notes_len; |
318 | struct kcore_list *m; | |
319 | size_t tsz; | |
1da177e4 LT |
320 | int nphdr; |
321 | unsigned long start; | |
46c0d6d0 | 322 | size_t buflen = iov_iter_count(iter); |
3673fb08 OS |
323 | size_t orig_buflen = buflen; |
324 | int ret = 0; | |
1da177e4 | 325 | |
0b172f84 | 326 | down_read(&kclist_lock); |
c6d9eee2 DH |
327 | /* |
328 | * Don't race against drivers that set PageOffline() and expect no | |
329 | * further page access. | |
330 | */ | |
331 | page_offline_freeze(); | |
678ad5d8 | 332 | |
37e949bd OS |
333 | get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); |
334 | phdrs_offset = sizeof(struct elfhdr); | |
335 | notes_offset = phdrs_offset + phdrs_len; | |
336 | ||
337 | /* ELF file header. */ | |
338 | if (buflen && *fpos < sizeof(struct elfhdr)) { | |
339 | struct elfhdr ehdr = { | |
340 | .e_ident = { | |
341 | [EI_MAG0] = ELFMAG0, | |
342 | [EI_MAG1] = ELFMAG1, | |
343 | [EI_MAG2] = ELFMAG2, | |
344 | [EI_MAG3] = ELFMAG3, | |
345 | [EI_CLASS] = ELF_CLASS, | |
346 | [EI_DATA] = ELF_DATA, | |
347 | [EI_VERSION] = EV_CURRENT, | |
348 | [EI_OSABI] = ELF_OSABI, | |
349 | }, | |
350 | .e_type = ET_CORE, | |
351 | .e_machine = ELF_ARCH, | |
352 | .e_version = EV_CURRENT, | |
353 | .e_phoff = sizeof(struct elfhdr), | |
354 | .e_flags = ELF_CORE_EFLAGS, | |
355 | .e_ehsize = sizeof(struct elfhdr), | |
356 | .e_phentsize = sizeof(struct elf_phdr), | |
357 | .e_phnum = nphdr, | |
358 | }; | |
359 | ||
360 | tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos); | |
46c0d6d0 | 361 | if (copy_to_iter((char *)&ehdr + *fpos, tsz, iter) != tsz) { |
37e949bd OS |
362 | ret = -EFAULT; |
363 | goto out; | |
364 | } | |
1da177e4 | 365 | |
37e949bd OS |
366 | buflen -= tsz; |
367 | *fpos += tsz; | |
368 | } | |
1da177e4 | 369 | |
37e949bd OS |
370 | /* ELF program headers. */ |
371 | if (buflen && *fpos < phdrs_offset + phdrs_len) { | |
372 | struct elf_phdr *phdrs, *phdr; | |
1da177e4 | 373 | |
37e949bd OS |
374 | phdrs = kzalloc(phdrs_len, GFP_KERNEL); |
375 | if (!phdrs) { | |
3673fb08 OS |
376 | ret = -ENOMEM; |
377 | goto out; | |
1da177e4 | 378 | } |
37e949bd OS |
379 | |
380 | phdrs[0].p_type = PT_NOTE; | |
381 | phdrs[0].p_offset = notes_offset; | |
382 | phdrs[0].p_filesz = notes_len; | |
383 | ||
384 | phdr = &phdrs[1]; | |
385 | list_for_each_entry(m, &kclist_head, list) { | |
386 | phdr->p_type = PT_LOAD; | |
387 | phdr->p_flags = PF_R | PF_W | PF_X; | |
388 | phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset; | |
3c36b419 DH |
389 | phdr->p_vaddr = (size_t)m->addr; |
390 | if (m->type == KCORE_RAM) | |
37e949bd OS |
391 | phdr->p_paddr = __pa(m->addr); |
392 | else if (m->type == KCORE_TEXT) | |
393 | phdr->p_paddr = __pa_symbol(m->addr); | |
394 | else | |
395 | phdr->p_paddr = (elf_addr_t)-1; | |
396 | phdr->p_filesz = phdr->p_memsz = m->size; | |
397 | phdr->p_align = PAGE_SIZE; | |
398 | phdr++; | |
399 | } | |
400 | ||
401 | tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos); | |
46c0d6d0 LS |
402 | if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz, |
403 | iter) != tsz) { | |
37e949bd | 404 | kfree(phdrs); |
3673fb08 OS |
405 | ret = -EFAULT; |
406 | goto out; | |
1da177e4 | 407 | } |
37e949bd OS |
408 | kfree(phdrs); |
409 | ||
1da177e4 LT |
410 | buflen -= tsz; |
411 | *fpos += tsz; | |
37e949bd OS |
412 | } |
413 | ||
414 | /* ELF note segment. */ | |
415 | if (buflen && *fpos < notes_offset + notes_len) { | |
416 | struct elf_prstatus prstatus = {}; | |
417 | struct elf_prpsinfo prpsinfo = { | |
418 | .pr_sname = 'R', | |
419 | .pr_fname = "vmlinux", | |
420 | }; | |
421 | char *notes; | |
422 | size_t i = 0; | |
423 | ||
9e627588 | 424 | strscpy(prpsinfo.pr_psargs, saved_command_line, |
37e949bd OS |
425 | sizeof(prpsinfo.pr_psargs)); |
426 | ||
427 | notes = kzalloc(notes_len, GFP_KERNEL); | |
428 | if (!notes) { | |
429 | ret = -ENOMEM; | |
430 | goto out; | |
431 | } | |
432 | ||
433 | append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus, | |
434 | sizeof(prstatus)); | |
435 | append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo, | |
436 | sizeof(prpsinfo)); | |
437 | append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current, | |
438 | arch_task_struct_size); | |
23c85094 OS |
439 | /* |
440 | * vmcoreinfo_size is mostly constant after init time, but it | |
441 | * can be changed by crash_save_vmcoreinfo(). Racing here with a | |
442 | * panic on another CPU before the machine goes down is insanely | |
443 | * unlikely, but it's better to not leave potential buffer | |
444 | * overflows lying around, regardless. | |
445 | */ | |
446 | append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, | |
447 | vmcoreinfo_data, | |
448 | min(vmcoreinfo_size, notes_len - i)); | |
1da177e4 | 449 | |
37e949bd | 450 | tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos); |
46c0d6d0 | 451 | if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) { |
37e949bd OS |
452 | kfree(notes); |
453 | ret = -EFAULT; | |
3673fb08 | 454 | goto out; |
37e949bd OS |
455 | } |
456 | kfree(notes); | |
457 | ||
37e949bd OS |
458 | buflen -= tsz; |
459 | *fpos += tsz; | |
3673fb08 | 460 | } |
1da177e4 LT |
461 | |
462 | /* | |
463 | * Check to see if our file offset matches with any of | |
464 | * the addresses in the elf_phdr on our list. | |
465 | */ | |
37e949bd | 466 | start = kc_offset_to_vaddr(*fpos - data_offset); |
1da177e4 LT |
467 | if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) |
468 | tsz = buflen; | |
1da177e4 | 469 | |
bf991c22 | 470 | m = NULL; |
37e949bd | 471 | while (buflen) { |
0daa322b DH |
472 | struct page *page; |
473 | unsigned long pfn; | |
474 | ||
bf991c22 OS |
475 | /* |
476 | * If this is the first iteration or the address is not within | |
477 | * the previous entry, search for a matching entry. | |
478 | */ | |
479 | if (!m || start < m->addr || start >= m->addr + m->size) { | |
04d168c6 JK |
480 | struct kcore_list *iter; |
481 | ||
482 | m = NULL; | |
483 | list_for_each_entry(iter, &kclist_head, list) { | |
484 | if (start >= iter->addr && | |
485 | start < iter->addr + iter->size) { | |
486 | m = iter; | |
bf991c22 | 487 | break; |
04d168c6 | 488 | } |
bf991c22 | 489 | } |
1da177e4 | 490 | } |
1da177e4 | 491 | |
c6d9eee2 DH |
492 | if (page_offline_frozen++ % MAX_ORDER_NR_PAGES == 0) { |
493 | page_offline_thaw(); | |
494 | cond_resched(); | |
495 | page_offline_freeze(); | |
496 | } | |
497 | ||
04d168c6 | 498 | if (!m) { |
46c0d6d0 | 499 | if (iov_iter_zero(tsz, iter) != tsz) { |
3673fb08 OS |
500 | ret = -EFAULT; |
501 | goto out; | |
502 | } | |
2711032c DH |
503 | goto skip; |
504 | } | |
505 | ||
506 | switch (m->type) { | |
507 | case KCORE_VMALLOC: | |
4c91c07c LS |
508 | { |
509 | const char *src = (char *)start; | |
510 | size_t read = 0, left = tsz; | |
511 | ||
512 | /* | |
513 | * vmalloc uses spinlocks, so we optimistically try to | |
514 | * read memory. If this fails, fault pages in and try | |
515 | * again until we are done. | |
516 | */ | |
517 | while (true) { | |
518 | read += vread_iter(iter, src, left); | |
519 | if (read == tsz) | |
520 | break; | |
521 | ||
522 | src += read; | |
523 | left -= read; | |
524 | ||
525 | if (fault_in_iov_iter_writeable(iter, left)) { | |
526 | ret = -EFAULT; | |
527 | goto out; | |
528 | } | |
3673fb08 | 529 | } |
2711032c | 530 | break; |
4c91c07c | 531 | } |
2711032c | 532 | case KCORE_USER: |
595dd46e | 533 | /* User page is handled prior to normal kernel page: */ |
46c0d6d0 | 534 | if (copy_to_iter((char *)start, tsz, iter) != tsz) { |
3673fb08 OS |
535 | ret = -EFAULT; |
536 | goto out; | |
537 | } | |
2711032c DH |
538 | break; |
539 | case KCORE_RAM: | |
0daa322b DH |
540 | pfn = __pa(start) >> PAGE_SHIFT; |
541 | page = pfn_to_online_page(pfn); | |
542 | ||
543 | /* | |
544 | * Don't read offline sections, logically offline pages | |
545 | * (e.g., inflated in a balloon), hwpoisoned pages, | |
546 | * and explicitly excluded physical ranges. | |
547 | */ | |
548 | if (!page || PageOffline(page) || | |
e538a582 AH |
549 | is_page_hwpoison(page) || !pfn_is_ram(pfn) || |
550 | pfn_is_unaccepted_memory(pfn)) { | |
46c0d6d0 | 551 | if (iov_iter_zero(tsz, iter) != tsz) { |
2711032c DH |
552 | ret = -EFAULT; |
553 | goto out; | |
554 | } | |
555 | break; | |
556 | } | |
557 | fallthrough; | |
558 | case KCORE_VMEMMAP: | |
559 | case KCORE_TEXT: | |
e025ab84 | 560 | /* |
17457784 LS |
561 | * Sadly we must use a bounce buffer here to be able to |
562 | * make use of copy_from_kernel_nofault(), as these | |
563 | * memory regions might not always be mapped on all | |
564 | * architectures. | |
e025ab84 | 565 | */ |
17457784 LS |
566 | if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { |
567 | if (iov_iter_zero(tsz, iter) != tsz) { | |
568 | ret = -EFAULT; | |
569 | goto out; | |
570 | } | |
571 | /* | |
572 | * We know the bounce buffer is safe to copy from, so | |
573 | * use _copy_to_iter() directly. | |
574 | */ | |
575 | } else if (_copy_to_iter(buf, tsz, iter) != tsz) { | |
2e1c0170 LS |
576 | ret = -EFAULT; |
577 | goto out; | |
1da177e4 | 578 | } |
2711032c DH |
579 | break; |
580 | default: | |
581 | pr_warn_once("Unhandled KCORE type: %d\n", m->type); | |
46c0d6d0 | 582 | if (iov_iter_zero(tsz, iter) != tsz) { |
2711032c DH |
583 | ret = -EFAULT; |
584 | goto out; | |
585 | } | |
1da177e4 | 586 | } |
2711032c | 587 | skip: |
1da177e4 LT |
588 | buflen -= tsz; |
589 | *fpos += tsz; | |
1da177e4 LT |
590 | start += tsz; |
591 | tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); | |
592 | } | |
593 | ||
3673fb08 | 594 | out: |
c6d9eee2 | 595 | page_offline_thaw(); |
3673fb08 OS |
596 | up_read(&kclist_lock); |
597 | if (ret) | |
598 | return ret; | |
599 | return orig_buflen - buflen; | |
1da177e4 | 600 | } |
97ce5d6d | 601 | |
3089aa1b KH |
602 | static int open_kcore(struct inode *inode, struct file *filp) |
603 | { | |
02e935bf DH |
604 | int ret = security_locked_down(LOCKDOWN_KCORE); |
605 | ||
3089aa1b KH |
606 | if (!capable(CAP_SYS_RAWIO)) |
607 | return -EPERM; | |
f5beeb18 | 608 | |
b602614a MG |
609 | if (ret) |
610 | return ret; | |
611 | ||
17457784 LS |
612 | filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); |
613 | if (!filp->private_data) | |
614 | return -ENOMEM; | |
615 | ||
3089aa1b KH |
616 | if (kcore_need_update) |
617 | kcore_update_ram(); | |
0d4c36a9 | 618 | if (i_size_read(inode) != proc_root_kcore->size) { |
5955102c | 619 | inode_lock(inode); |
0d4c36a9 | 620 | i_size_write(inode, proc_root_kcore->size); |
5955102c | 621 | inode_unlock(inode); |
0d4c36a9 | 622 | } |
3089aa1b KH |
623 | return 0; |
624 | } | |
625 | ||
17457784 LS |
626 | static int release_kcore(struct inode *inode, struct file *file) |
627 | { | |
628 | kfree(file->private_data); | |
629 | return 0; | |
630 | } | |
631 | ||
97a32539 | 632 | static const struct proc_ops kcore_proc_ops = { |
46c0d6d0 | 633 | .proc_read_iter = read_kcore_iter, |
97a32539 | 634 | .proc_open = open_kcore, |
17457784 | 635 | .proc_release = release_kcore, |
97a32539 | 636 | .proc_lseek = default_llseek, |
3089aa1b KH |
637 | }; |
638 | ||
3089aa1b KH |
639 | /* just remember that we have to update kcore */ |
640 | static int __meminit kcore_callback(struct notifier_block *self, | |
641 | unsigned long action, void *arg) | |
642 | { | |
643 | switch (action) { | |
644 | case MEM_ONLINE: | |
645 | case MEM_OFFLINE: | |
3089aa1b | 646 | kcore_need_update = 1; |
bf531831 | 647 | break; |
3089aa1b KH |
648 | } |
649 | return NOTIFY_OK; | |
650 | } | |
3089aa1b KH |
651 | |
652 | ||
a0614da8 KH |
653 | static struct kcore_list kcore_vmalloc; |
654 | ||
9492587c KH |
655 | #ifdef CONFIG_ARCH_PROC_KCORE_TEXT |
656 | static struct kcore_list kcore_text; | |
657 | /* | |
658 | * If defined, special segment is used for mapping kernel text instead of | |
659 | * direct-map area. We need to create special TEXT section. | |
660 | */ | |
661 | static void __init proc_kcore_text_init(void) | |
662 | { | |
36e15263 | 663 | kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); |
9492587c KH |
664 | } |
665 | #else | |
666 | static void __init proc_kcore_text_init(void) | |
667 | { | |
668 | } | |
669 | #endif | |
670 | ||
81ac3ad9 KH |
671 | #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) |
672 | /* | |
673 | * MODULES_VADDR has no intersection with VMALLOC_ADDR. | |
674 | */ | |
eebf3648 | 675 | static struct kcore_list kcore_modules; |
81ac3ad9 KH |
676 | static void __init add_modules_range(void) |
677 | { | |
bf3e2692 BH |
678 | if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { |
679 | kclist_add(&kcore_modules, (void *)MODULES_VADDR, | |
81ac3ad9 | 680 | MODULES_END - MODULES_VADDR, KCORE_VMALLOC); |
bf3e2692 | 681 | } |
81ac3ad9 KH |
682 | } |
683 | #else | |
684 | static void __init add_modules_range(void) | |
685 | { | |
686 | } | |
687 | #endif | |
688 | ||
97ce5d6d AD |
689 | static int __init proc_kcore_init(void) |
690 | { | |
97a32539 | 691 | proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops); |
90396f96 | 692 | if (!proc_root_kcore) { |
87ebdc00 | 693 | pr_err("couldn't create /proc/kcore\n"); |
90396f96 KH |
694 | return 0; /* Always returns 0. */ |
695 | } | |
3089aa1b | 696 | /* Store text area if it's special */ |
9492587c | 697 | proc_kcore_text_init(); |
3089aa1b | 698 | /* Store vmalloc area */ |
a0614da8 KH |
699 | kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, |
700 | VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); | |
81ac3ad9 | 701 | add_modules_range(); |
3089aa1b KH |
702 | /* Store direct-map area from physical memory map */ |
703 | kcore_update_ram(); | |
1eeaa4fd | 704 | hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI); |
3089aa1b | 705 | |
97ce5d6d AD |
706 | return 0; |
707 | } | |
abaf3787 | 708 | fs_initcall(proc_kcore_init); |